LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
20#include "RISCVSubtarget.h"
21#include "RISCVTargetMachine.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
80 const RISCVSubtarget &STI)
81 : TargetLowering(TM), Subtarget(STI) {
82
83 RISCVABI::ABI ABI = Subtarget.getTargetABI();
84 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
85
86 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
87 !Subtarget.hasStdExtF()) {
88 errs() << "Hard-float 'f' ABI can't be used for a target that "
89 "doesn't support the F instruction set extension (ignoring "
90 "target-abi)\n";
92 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
93 !Subtarget.hasStdExtD()) {
94 errs() << "Hard-float 'd' ABI can't be used for a target that "
95 "doesn't support the D instruction set extension (ignoring "
96 "target-abi)\n";
98 }
99
100 switch (ABI) {
101 default:
102 report_fatal_error("Don't know how to lower this ABI");
111 break;
112 }
113
114 MVT XLenVT = Subtarget.getXLenVT();
115
116 // Set up the register classes.
117 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
118
119 if (Subtarget.hasStdExtZfhmin())
120 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
121 if (Subtarget.hasStdExtZfbfmin())
122 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtF())
124 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
125 if (Subtarget.hasStdExtD())
126 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
127 if (Subtarget.hasStdExtZhinxmin())
128 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
129 if (Subtarget.hasStdExtZfinx())
130 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
131 if (Subtarget.hasStdExtZdinx()) {
132 if (Subtarget.is64Bit())
133 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
134 else
135 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
136 }
137
138 static const MVT::SimpleValueType BoolVecVTs[] = {
139 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
140 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
141 static const MVT::SimpleValueType IntVecVTs[] = {
142 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
143 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
144 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
145 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
146 MVT::nxv4i64, MVT::nxv8i64};
147 static const MVT::SimpleValueType F16VecVTs[] = {
148 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
149 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
150 static const MVT::SimpleValueType BF16VecVTs[] = {
151 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
152 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
153 static const MVT::SimpleValueType F32VecVTs[] = {
154 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
155 static const MVT::SimpleValueType F64VecVTs[] = {
156 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
157 static const MVT::SimpleValueType VecTupleVTs[] = {
158 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
159 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
160 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
161 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
162 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
163 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
164 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
165 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
166 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
167 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
168 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
169
170 if (Subtarget.hasVInstructions()) {
171 auto addRegClassForRVV = [this](MVT VT) {
172 // Disable the smallest fractional LMUL types if ELEN is less than
173 // RVVBitsPerBlock.
174 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
175 if (VT.getVectorMinNumElements() < MinElts)
176 return;
177
178 unsigned Size = VT.getSizeInBits().getKnownMinValue();
179 const TargetRegisterClass *RC;
181 RC = &RISCV::VRRegClass;
182 else if (Size == 2 * RISCV::RVVBitsPerBlock)
183 RC = &RISCV::VRM2RegClass;
184 else if (Size == 4 * RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM4RegClass;
186 else if (Size == 8 * RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM8RegClass;
188 else
189 llvm_unreachable("Unexpected size");
190
191 addRegisterClass(VT, RC);
192 };
193
194 for (MVT VT : BoolVecVTs)
195 addRegClassForRVV(VT);
196 for (MVT VT : IntVecVTs) {
197 if (VT.getVectorElementType() == MVT::i64 &&
198 !Subtarget.hasVInstructionsI64())
199 continue;
200 addRegClassForRVV(VT);
201 }
202
203 if (Subtarget.hasVInstructionsF16Minimal())
204 for (MVT VT : F16VecVTs)
205 addRegClassForRVV(VT);
206
207 if (Subtarget.hasVInstructionsBF16Minimal())
208 for (MVT VT : BF16VecVTs)
209 addRegClassForRVV(VT);
210
211 if (Subtarget.hasVInstructionsF32())
212 for (MVT VT : F32VecVTs)
213 addRegClassForRVV(VT);
214
215 if (Subtarget.hasVInstructionsF64())
216 for (MVT VT : F64VecVTs)
217 addRegClassForRVV(VT);
218
219 if (Subtarget.useRVVForFixedLengthVectors()) {
220 auto addRegClassForFixedVectors = [this](MVT VT) {
221 MVT ContainerVT = getContainerForFixedLengthVector(VT);
222 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
223 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
224 addRegisterClass(VT, TRI.getRegClass(RCID));
225 };
227 if (useRVVForFixedLengthVectorVT(VT))
228 addRegClassForFixedVectors(VT);
229
231 if (useRVVForFixedLengthVectorVT(VT))
232 addRegClassForFixedVectors(VT);
233 }
234
235 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
236 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
237 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
242 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
243 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
249 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
250 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
256 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
257 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
263 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
264 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
266 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
267 }
268
269 // Compute derived properties from the register classes.
271
273
275 MVT::i1, Promote);
276 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
278 MVT::i1, Promote);
279
280 // TODO: add all necessary setOperationAction calls.
281 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
282
283 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
284 setOperationAction(ISD::BR_CC, XLenVT, Expand);
285 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
287
292 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
295 }
296
297 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
298
299 setOperationAction(ISD::VASTART, MVT::Other, Custom);
300 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
301
303
305
306 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
307 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
308 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
309
310 if (Subtarget.is64Bit()) {
312
313 setOperationAction(ISD::LOAD, MVT::i32, Custom);
315 MVT::i32, Custom);
317 if (!Subtarget.hasStdExtZbb())
320 Custom);
322 }
323 if (!Subtarget.hasStdExtZmmul()) {
325 } else if (Subtarget.is64Bit()) {
328 } else {
330 }
331
332 if (!Subtarget.hasStdExtM()) {
334 Expand);
335 } else if (Subtarget.is64Bit()) {
337 {MVT::i8, MVT::i16, MVT::i32}, Custom);
338 }
339
342 Expand);
343
345 Custom);
346
347 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
348 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXTHeadBb()) {
351 if (Subtarget.is64Bit())
354 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
356 } else {
358 }
359
360 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
361 // pattern match it directly in isel.
363 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
364 Subtarget.hasVendorXTHeadBb())
365 ? Legal
366 : Expand);
367
368 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
370 } else {
371 // Zbkb can use rev8+brev8 to implement bitreverse.
373 Subtarget.hasStdExtZbkb() ? Custom : Expand);
374 }
375
376 if (Subtarget.hasStdExtZbb() ||
377 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
379 Legal);
380 }
381
382 if (Subtarget.hasStdExtZbb() ||
383 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
384 if (Subtarget.is64Bit())
386 } else {
388 }
389
390 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
391 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
392 // We need the custom lowering to make sure that the resulting sequence
393 // for the 32bit case is efficient on 64bit targets.
394 if (Subtarget.is64Bit())
396 } else {
398 }
399
400 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
402 } else if (Subtarget.hasShortForwardBranchOpt()) {
403 // We can use PseudoCCSUB to implement ABS.
405 } else if (Subtarget.is64Bit()) {
407 }
408
409 if (!Subtarget.hasVendorXTHeadCondMov())
411
412 static const unsigned FPLegalNodeTypes[] = {
413 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM,
414 ISD::FMAXIMUMNUM, ISD::LRINT, ISD::LLRINT,
415 ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT,
420
421 static const ISD::CondCode FPCCToExpand[] = {
425
426 static const unsigned FPOpToExpand[] = {
427 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
428 ISD::FREM};
429
430 static const unsigned FPRndMode[] = {
431 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
432 ISD::FROUNDEVEN};
433
434 static const unsigned ZfhminZfbfminPromoteOps[] = {
435 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
436 ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
441 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
442 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
443 ISD::FROUNDEVEN};
444
445 if (Subtarget.hasStdExtZfbfmin()) {
446 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
450 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
451 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
453 setOperationAction(ISD::FABS, MVT::bf16, Custom);
454 setOperationAction(ISD::FNEG, MVT::bf16, Custom);
458 }
459
460 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
461 if (Subtarget.hasStdExtZfhOrZhinx()) {
462 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
463 setOperationAction(FPRndMode, MVT::f16,
464 Subtarget.hasStdExtZfa() ? Legal : Custom);
466 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
467 Subtarget.hasStdExtZfa() ? Legal : Custom);
468 if (Subtarget.hasStdExtZfa())
470 } else {
471 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
472 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
473 for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
476 setOperationAction(Op, MVT::f16, Custom);
477 setOperationAction(ISD::FABS, MVT::f16, Custom);
478 setOperationAction(ISD::FNEG, MVT::f16, Custom);
482 }
483
484 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
485
488 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
491 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
492
494 ISD::FNEARBYINT, MVT::f16,
495 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
496 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
497 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
498 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
499 ISD::FLOG10},
500 MVT::f16, Promote);
501
502 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
503 // complete support for all operations in LegalizeDAG.
508 MVT::f16, Promote);
509
510 // We need to custom promote this.
511 if (Subtarget.is64Bit())
512 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
513 }
514
515 if (Subtarget.hasStdExtFOrZfinx()) {
516 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
517 setOperationAction(FPRndMode, MVT::f32,
518 Subtarget.hasStdExtZfa() ? Legal : Custom);
519 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
522 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
523 setOperationAction(FPOpToExpand, MVT::f32, Expand);
524 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
525 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
526 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
527 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
529 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
530 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
531 Subtarget.isSoftFPABI() ? LibCall : Custom);
532 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
533 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
534 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
535 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
536
537 if (Subtarget.hasStdExtZfa()) {
539 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
540 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
541 } else {
542 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
543 }
544 }
545
546 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
547 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
548
549 if (Subtarget.hasStdExtDOrZdinx()) {
550 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
551
552 if (!Subtarget.is64Bit())
553 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
554
555 if (Subtarget.hasStdExtZfa()) {
557 setOperationAction(FPRndMode, MVT::f64, Legal);
558 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
559 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
560 } else {
561 if (Subtarget.is64Bit())
562 setOperationAction(FPRndMode, MVT::f64, Custom);
563
564 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
565 }
566
569 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
572 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
573 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
574 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
575 setOperationAction(FPOpToExpand, MVT::f64, Expand);
576 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
577 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
578 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
579 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
581 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
582 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
583 Subtarget.isSoftFPABI() ? LibCall : Custom);
584 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
585 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
586 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
587 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
588 }
589
590 if (Subtarget.is64Bit()) {
593 MVT::i32, Custom);
594 setOperationAction(ISD::LROUND, MVT::i32, Custom);
595 }
596
597 if (Subtarget.hasStdExtFOrZfinx()) {
599 Custom);
600
601 // f16/bf16 require custom handling.
603 Custom);
605 Custom);
606
608 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
609 }
610
613 XLenVT, Custom);
614
616
617 if (Subtarget.is64Bit())
619
620 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
621 // Unfortunately this can't be determined just from the ISA naming string.
622 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
623 Subtarget.is64Bit() ? Legal : Custom);
624 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
625 Subtarget.is64Bit() ? Legal : Custom);
626
627 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
629 if (Subtarget.is64Bit())
631
632 if (Subtarget.hasStdExtZicbop()) {
633 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
634 }
635
636 if (Subtarget.hasStdExtA()) {
638 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
640 else
642 } else if (Subtarget.hasForcedAtomics()) {
644 } else {
646 }
647
648 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
649
651
652 if (getTargetMachine().getTargetTriple().isOSLinux()) {
653 // Custom lowering of llvm.clear_cache.
655 }
656
657 if (Subtarget.hasVInstructions()) {
659
660 setOperationAction(ISD::VSCALE, XLenVT, Custom);
661
662 // RVV intrinsics may have illegal operands.
663 // We also need to custom legalize vmv.x.s.
666 {MVT::i8, MVT::i16}, Custom);
667 if (Subtarget.is64Bit())
669 MVT::i32, Custom);
670 else
672 MVT::i64, Custom);
673
675 MVT::Other, Custom);
676
677 static const unsigned IntegerVPOps[] = {
678 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
679 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
680 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
681 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
682 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
683 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
684 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
685 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
686 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
687 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
688 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
689 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
690 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
691 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
692 ISD::EXPERIMENTAL_VP_SPLAT};
693
694 static const unsigned FloatingPointVPOps[] = {
695 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
696 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
697 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
698 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
699 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
700 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
701 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
702 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
703 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
704 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
705 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
706 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
707 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
708 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
709
710 static const unsigned IntegerVecReduceOps[] = {
711 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
712 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
713 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
714
715 static const unsigned FloatingPointVecReduceOps[] = {
716 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
717 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
718
719 if (!Subtarget.is64Bit()) {
720 // We must custom-lower certain vXi64 operations on RV32 due to the vector
721 // element type being illegal.
723 MVT::i64, Custom);
724
725 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
726
727 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
728 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
729 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
730 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
731 MVT::i64, Custom);
732 }
733
734 for (MVT VT : BoolVecVTs) {
735 if (!isTypeLegal(VT))
736 continue;
737
739
740 // Mask VTs are custom-expanded into a series of standard nodes
744 VT, Custom);
745
747 Custom);
748
751 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
752 Expand);
753
754 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
755 Custom);
756
757 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
758
760 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
761 Custom);
762
764 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
765 Custom);
766
767 // RVV has native int->float & float->int conversions where the
768 // element type sizes are within one power-of-two of each other. Any
769 // wider distances between type sizes have to be lowered as sequences
770 // which progressively narrow the gap in stages.
775 VT, Custom);
777 Custom);
778
779 // Expand all extending loads to types larger than this, and truncating
780 // stores from types larger than this.
782 setTruncStoreAction(VT, OtherVT, Expand);
784 OtherVT, Expand);
785 }
786
787 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
788 ISD::VP_TRUNCATE, ISD::VP_SETCC},
789 VT, Custom);
790
793
795
796 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
797 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
798
801 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
802 }
803
804 for (MVT VT : IntVecVTs) {
805 if (!isTypeLegal(VT))
806 continue;
807
810
811 // Vectors implement MULHS/MULHU.
813
814 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
815 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
817
819 Legal);
820
822
823 // Custom-lower extensions and truncations from/to mask types.
825 VT, Custom);
826
827 // RVV has native int->float & float->int conversions where the
828 // element type sizes are within one power-of-two of each other. Any
829 // wider distances between type sizes have to be lowered as sequences
830 // which progressively narrow the gap in stages.
835 VT, Custom);
837 Custom);
841 VT, Legal);
842
843 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
844 // nodes which truncate by one power of two at a time.
847 Custom);
848
849 // Custom-lower insert/extract operations to simplify patterns.
851 Custom);
852
853 // Custom-lower reduction operations to set up the corresponding custom
854 // nodes' operands.
855 setOperationAction(IntegerVecReduceOps, VT, Custom);
856
857 setOperationAction(IntegerVPOps, VT, Custom);
858
859 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
860
861 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
862 VT, Custom);
863
865 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
866 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
867 VT, Custom);
868
871 VT, Custom);
872
875
877
879 setTruncStoreAction(VT, OtherVT, Expand);
881 OtherVT, Expand);
882 }
883
886
887 // Splice
889
890 if (Subtarget.hasStdExtZvkb()) {
892 setOperationAction(ISD::VP_BSWAP, VT, Custom);
893 } else {
894 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
896 }
897
898 if (Subtarget.hasStdExtZvbb()) {
900 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
901 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
902 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
903 VT, Custom);
904 } else {
905 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
907 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
908 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
909 VT, Expand);
910
911 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
912 // range of f32.
913 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
914 if (isTypeLegal(FloatVT)) {
916 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
917 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
918 VT, Custom);
919 }
920 }
921 }
922
923 for (MVT VT : VecTupleVTs) {
924 if (!isTypeLegal(VT))
925 continue;
926
927 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
928 }
929
930 // Expand various CCs to best match the RVV ISA, which natively supports UNE
931 // but no other unordered comparisons, and supports all ordered comparisons
932 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
933 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
934 // and we pattern-match those back to the "original", swapping operands once
935 // more. This way we catch both operations and both "vf" and "fv" forms with
936 // fewer patterns.
937 static const ISD::CondCode VFPCCToExpand[] = {
941 };
942
943 // TODO: support more ops.
944 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
945 ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
946 ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
947 ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND,
948 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS,
949 ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD,
952
953 // TODO: support more vp ops.
954 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
955 ISD::VP_FADD,
956 ISD::VP_FSUB,
957 ISD::VP_FMUL,
958 ISD::VP_FDIV,
959 ISD::VP_FMA,
960 ISD::VP_REDUCE_FADD,
961 ISD::VP_REDUCE_SEQ_FADD,
962 ISD::VP_REDUCE_FMIN,
963 ISD::VP_REDUCE_FMAX,
964 ISD::VP_SQRT,
965 ISD::VP_FMINNUM,
966 ISD::VP_FMAXNUM,
967 ISD::VP_FCEIL,
968 ISD::VP_FFLOOR,
969 ISD::VP_FROUND,
970 ISD::VP_FROUNDEVEN,
971 ISD::VP_FROUNDTOZERO,
972 ISD::VP_FRINT,
973 ISD::VP_FNEARBYINT,
974 ISD::VP_SETCC,
975 ISD::VP_FMINIMUM,
976 ISD::VP_FMAXIMUM,
977 ISD::VP_REDUCE_FMINIMUM,
978 ISD::VP_REDUCE_FMAXIMUM};
979
980 // Sets common operation actions on RVV floating-point vector types.
981 const auto SetCommonVFPActions = [&](MVT VT) {
983 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
984 // sizes are within one power-of-two of each other. Therefore conversions
985 // between vXf16 and vXf64 must be lowered as sequences which convert via
986 // vXf32.
987 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
988 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
989 // Custom-lower insert/extract operations to simplify patterns.
991 Custom);
992 // Expand various condition codes (explained above).
993 setCondCodeAction(VFPCCToExpand, VT, Expand);
994
995 setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
996 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
997
998 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
999 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1001 VT, Custom);
1002
1003 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1004
1005 // Expand FP operations that need libcalls.
1007 setOperationAction(ISD::FPOW, VT, Expand);
1008 setOperationAction(ISD::FCOS, VT, Expand);
1009 setOperationAction(ISD::FSIN, VT, Expand);
1010 setOperationAction(ISD::FSINCOS, VT, Expand);
1011 setOperationAction(ISD::FEXP, VT, Expand);
1012 setOperationAction(ISD::FEXP2, VT, Expand);
1013 setOperationAction(ISD::FEXP10, VT, Expand);
1014 setOperationAction(ISD::FLOG, VT, Expand);
1015 setOperationAction(ISD::FLOG2, VT, Expand);
1016 setOperationAction(ISD::FLOG10, VT, Expand);
1017
1019
1020 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1021
1022 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1023 VT, Custom);
1024
1026 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1027 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1028 VT, Custom);
1029
1032
1035 VT, Custom);
1036
1039
1041
1042 setOperationAction(FloatingPointVPOps, VT, Custom);
1043
1045 Custom);
1048 VT, Legal);
1053 VT, Custom);
1054 };
1055
1056 // Sets common extload/truncstore actions on RVV floating-point vector
1057 // types.
1058 const auto SetCommonVFPExtLoadTruncStoreActions =
1059 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1060 for (auto SmallVT : SmallerVTs) {
1061 setTruncStoreAction(VT, SmallVT, Expand);
1062 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1063 }
1064 };
1065
1066 // Sets common actions for f16 and bf16 for when there's only
1067 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1068 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1069 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1071 Custom);
1072 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1073 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1074 Custom);
1077 ISD::VP_UINT_TO_FP},
1078 VT, Custom);
1082 VT, Custom);
1083 MVT EltVT = VT.getVectorElementType();
1084 if (isTypeLegal(EltVT))
1085 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1087 VT, Custom);
1088 else
1089 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1090 EltVT, Custom);
1091 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1092 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1093 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1094 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1095 ISD::VP_SCATTER},
1096 VT, Custom);
1097
1098 setOperationAction(ISD::FNEG, VT, Expand);
1099 setOperationAction(ISD::FABS, VT, Expand);
1101
1102 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1103 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1104 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1105 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1106 } else {
1107 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1108 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1109 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1110 }
1111 };
1112
1113 if (Subtarget.hasVInstructionsF16()) {
1114 for (MVT VT : F16VecVTs) {
1115 if (!isTypeLegal(VT))
1116 continue;
1117 SetCommonVFPActions(VT);
1118 }
1119 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1120 for (MVT VT : F16VecVTs) {
1121 if (!isTypeLegal(VT))
1122 continue;
1123 SetCommonPromoteToF32Actions(VT);
1124 }
1125 }
1126
1127 if (Subtarget.hasVInstructionsBF16Minimal()) {
1128 for (MVT VT : BF16VecVTs) {
1129 if (!isTypeLegal(VT))
1130 continue;
1131 SetCommonPromoteToF32Actions(VT);
1132 }
1133 }
1134
1135 if (Subtarget.hasVInstructionsF32()) {
1136 for (MVT VT : F32VecVTs) {
1137 if (!isTypeLegal(VT))
1138 continue;
1139 SetCommonVFPActions(VT);
1140 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1141 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1142 }
1143 }
1144
1145 if (Subtarget.hasVInstructionsF64()) {
1146 for (MVT VT : F64VecVTs) {
1147 if (!isTypeLegal(VT))
1148 continue;
1149 SetCommonVFPActions(VT);
1150 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1151 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1152 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1153 }
1154 }
1155
1156 if (Subtarget.useRVVForFixedLengthVectors()) {
1158 if (!useRVVForFixedLengthVectorVT(VT))
1159 continue;
1160
1161 // By default everything must be expanded.
1162 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1165 setTruncStoreAction(VT, OtherVT, Expand);
1167 OtherVT, Expand);
1168 }
1169
1170 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1171 // expansion to a build_vector of 0s.
1173
1174 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1176 Custom);
1177
1180 Custom);
1181
1183 VT, Custom);
1184
1186
1187 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1188
1190
1192
1195 Custom);
1196
1197 setOperationAction(ISD::BITCAST, VT, Custom);
1198
1200 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1201 Custom);
1202
1204 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1205 Custom);
1206
1208 {
1217 },
1218 VT, Custom);
1220 Custom);
1221
1223
1224 // Operations below are different for between masks and other vectors.
1225 if (VT.getVectorElementType() == MVT::i1) {
1226 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1227 ISD::OR, ISD::XOR},
1228 VT, Custom);
1229
1230 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1231 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1232 VT, Custom);
1233
1234 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1235 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1236 continue;
1237 }
1238
1239 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1240 // it before type legalization for i64 vectors on RV32. It will then be
1241 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1242 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1243 // improvements first.
1244 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1247 }
1248
1250 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1251
1252 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1253 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1254 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1255 ISD::VP_SCATTER},
1256 VT, Custom);
1257
1261 VT, Custom);
1262
1265
1267
1268 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1269 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1271
1275 VT, Custom);
1276
1278
1281
1282 // Custom-lower reduction operations to set up the corresponding custom
1283 // nodes' operands.
1284 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1285 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1286 ISD::VECREDUCE_UMIN},
1287 VT, Custom);
1288
1289 setOperationAction(IntegerVPOps, VT, Custom);
1290
1291 if (Subtarget.hasStdExtZvkb())
1293
1294 if (Subtarget.hasStdExtZvbb()) {
1297 VT, Custom);
1298 } else {
1299 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1300 // range of f32.
1301 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1302 if (isTypeLegal(FloatVT))
1305 Custom);
1306 }
1307 }
1308
1310 // There are no extending loads or truncating stores.
1311 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1312 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1313 setTruncStoreAction(VT, InnerVT, Expand);
1314 }
1315
1316 if (!useRVVForFixedLengthVectorVT(VT))
1317 continue;
1318
1319 // By default everything must be expanded.
1320 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1322
1323 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1324 // expansion to a build_vector of 0s.
1326
1329 VT, Custom);
1330
1331 // FIXME: mload, mstore, mgather, mscatter, vp_load/store,
1332 // vp_stride_load/store, vp_gather/scatter can be hoisted to here.
1333 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1334 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE}, VT, Custom);
1335
1336 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1338 Custom);
1339
1340 if (VT.getVectorElementType() == MVT::f16 &&
1341 !Subtarget.hasVInstructionsF16()) {
1342 setOperationAction(ISD::BITCAST, VT, Custom);
1343 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1345 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1346 Custom);
1348 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1349 VT, Custom);
1351 if (Subtarget.hasStdExtZfhmin()) {
1353 } else {
1354 // We need to custom legalize f16 build vectors if Zfhmin isn't
1355 // available.
1357 }
1358 setOperationAction(ISD::FNEG, VT, Expand);
1359 setOperationAction(ISD::FABS, VT, Expand);
1361 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1362 // Don't promote f16 vector operations to f32 if f32 vector type is
1363 // not legal.
1364 // TODO: could split the f16 vector into two vectors and do promotion.
1365 if (!isTypeLegal(F32VecVT))
1366 continue;
1367 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1368 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1369 continue;
1370 }
1371
1372 if (VT.getVectorElementType() == MVT::bf16) {
1373 setOperationAction(ISD::BITCAST, VT, Custom);
1374 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1375 if (Subtarget.hasStdExtZfbfmin()) {
1377 } else {
1378 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1379 // available.
1381 }
1383 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1384 Custom);
1385 // TODO: Promote to fp32.
1386 continue;
1387 }
1388
1391 VT, Custom);
1392
1394 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1395
1396 setOperationAction({ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1397 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1398 ISD::VP_SCATTER},
1399 VT, Custom);
1400
1402 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1403 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1404 ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},
1405 VT, Custom);
1406
1407 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1408 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1409 VT, Custom);
1410
1411 setCondCodeAction(VFPCCToExpand, VT, Expand);
1412
1415
1416 setOperationAction(ISD::BITCAST, VT, Custom);
1417
1418 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1419
1420 setOperationAction(FloatingPointVPOps, VT, Custom);
1421
1428 VT, Custom);
1429 }
1430
1431 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1432 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1433 Custom);
1434 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1435 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1436 if (Subtarget.hasStdExtFOrZfinx())
1437 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1438 if (Subtarget.hasStdExtDOrZdinx())
1439 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1440 }
1441 }
1442
1443 if (Subtarget.hasStdExtA())
1444 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1445
1446 if (Subtarget.hasForcedAtomics()) {
1447 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1449 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1450 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1451 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1452 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1453 XLenVT, LibCall);
1454 }
1455
1456 if (Subtarget.hasVendorXTHeadMemIdx()) {
1457 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1458 setIndexedLoadAction(im, MVT::i8, Legal);
1459 setIndexedStoreAction(im, MVT::i8, Legal);
1460 setIndexedLoadAction(im, MVT::i16, Legal);
1461 setIndexedStoreAction(im, MVT::i16, Legal);
1462 setIndexedLoadAction(im, MVT::i32, Legal);
1463 setIndexedStoreAction(im, MVT::i32, Legal);
1464
1465 if (Subtarget.is64Bit()) {
1466 setIndexedLoadAction(im, MVT::i64, Legal);
1467 setIndexedStoreAction(im, MVT::i64, Legal);
1468 }
1469 }
1470 }
1471
1472 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1476
1480 }
1481
1482 // Function alignments.
1483 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1484 setMinFunctionAlignment(FunctionAlignment);
1485 // Set preferred alignments.
1488
1493
1494 if (Subtarget.hasStdExtFOrZfinx())
1495 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1496
1497 if (Subtarget.hasStdExtZbb())
1499
1500 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1501 Subtarget.hasVInstructions())
1503
1504 if (Subtarget.hasStdExtZbkb())
1506 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1508 if (Subtarget.hasStdExtFOrZfinx())
1511 if (Subtarget.hasVInstructions())
1512 setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1513 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1514 ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
1516 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1519 if (Subtarget.hasVendorXTHeadMemPair())
1520 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1521 if (Subtarget.useRVVForFixedLengthVectors())
1522 setTargetDAGCombine(ISD::BITCAST);
1523
1524 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1525 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1526
1527 // Disable strict node mutation.
1528 IsStrictFPEnabled = true;
1529 EnableExtLdPromotion = true;
1530
1531 // Let the subtarget decide if a predictable select is more expensive than the
1532 // corresponding branch. This information is used in CGP/SelectOpt to decide
1533 // when to convert selects into branches.
1534 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1535}
1536
1538 LLVMContext &Context,
1539 EVT VT) const {
1540 if (!VT.isVector())
1541 return getPointerTy(DL);
1542 if (Subtarget.hasVInstructions() &&
1543 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1544 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1546}
1547
1548MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1549 return Subtarget.getXLenVT();
1550}
1551
1552// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1553bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1554 unsigned VF,
1555 bool IsScalable) const {
1556 if (!Subtarget.hasVInstructions())
1557 return true;
1558
1559 if (!IsScalable)
1560 return true;
1561
1562 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1563 return true;
1564
1565 // Don't allow VF=1 if those types are't legal.
1566 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1567 return true;
1568
1569 // VLEN=32 support is incomplete.
1570 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1571 return true;
1572
1573 // The maximum VF is for the smallest element width with LMUL=8.
1574 // VF must be a power of 2.
1575 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1576 return VF > MaxVF || !isPowerOf2_32(VF);
1577}
1578
1580 return !Subtarget.hasVInstructions() ||
1581 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1582}
1583
1585 const CallInst &I,
1586 MachineFunction &MF,
1587 unsigned Intrinsic) const {
1588 auto &DL = I.getDataLayout();
1589
1590 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1591 bool IsUnitStrided, bool UsePtrVal = false) {
1592 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1593 // We can't use ptrVal if the intrinsic can access memory before the
1594 // pointer. This means we can't use it for strided or indexed intrinsics.
1595 if (UsePtrVal)
1596 Info.ptrVal = I.getArgOperand(PtrOp);
1597 else
1598 Info.fallbackAddressSpace =
1599 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1600 Type *MemTy;
1601 if (IsStore) {
1602 // Store value is the first operand.
1603 MemTy = I.getArgOperand(0)->getType();
1604 } else {
1605 // Use return type. If it's segment load, return type is a struct.
1606 MemTy = I.getType();
1607 if (MemTy->isStructTy())
1608 MemTy = MemTy->getStructElementType(0);
1609 }
1610 if (!IsUnitStrided)
1611 MemTy = MemTy->getScalarType();
1612
1613 Info.memVT = getValueType(DL, MemTy);
1614 if (MemTy->isTargetExtTy())
1615 Info.align = DL.getABITypeAlign(MemTy);
1616 else
1617 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1618 Info.size = MemoryLocation::UnknownSize;
1619 Info.flags |=
1621 return true;
1622 };
1623
1624 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1626
1628 switch (Intrinsic) {
1629 default:
1630 return false;
1631 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1632 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1633 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1634 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1635 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1636 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1637 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1638 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1639 case Intrinsic::riscv_masked_cmpxchg_i32:
1640 Info.opc = ISD::INTRINSIC_W_CHAIN;
1641 Info.memVT = MVT::i32;
1642 Info.ptrVal = I.getArgOperand(0);
1643 Info.offset = 0;
1644 Info.align = Align(4);
1647 return true;
1648 case Intrinsic::riscv_seg2_load:
1649 case Intrinsic::riscv_seg3_load:
1650 case Intrinsic::riscv_seg4_load:
1651 case Intrinsic::riscv_seg5_load:
1652 case Intrinsic::riscv_seg6_load:
1653 case Intrinsic::riscv_seg7_load:
1654 case Intrinsic::riscv_seg8_load:
1655 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1656 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1657 case Intrinsic::riscv_seg2_store:
1658 case Intrinsic::riscv_seg3_store:
1659 case Intrinsic::riscv_seg4_store:
1660 case Intrinsic::riscv_seg5_store:
1661 case Intrinsic::riscv_seg6_store:
1662 case Intrinsic::riscv_seg7_store:
1663 case Intrinsic::riscv_seg8_store:
1664 // Operands are (vec, ..., vec, ptr, vl)
1665 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1666 /*IsStore*/ true,
1667 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1668 case Intrinsic::riscv_vle:
1669 case Intrinsic::riscv_vle_mask:
1670 case Intrinsic::riscv_vleff:
1671 case Intrinsic::riscv_vleff_mask:
1672 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1673 /*IsStore*/ false,
1674 /*IsUnitStrided*/ true,
1675 /*UsePtrVal*/ true);
1676 case Intrinsic::riscv_vse:
1677 case Intrinsic::riscv_vse_mask:
1678 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1679 /*IsStore*/ true,
1680 /*IsUnitStrided*/ true,
1681 /*UsePtrVal*/ true);
1682 case Intrinsic::riscv_vlse:
1683 case Intrinsic::riscv_vlse_mask:
1684 case Intrinsic::riscv_vloxei:
1685 case Intrinsic::riscv_vloxei_mask:
1686 case Intrinsic::riscv_vluxei:
1687 case Intrinsic::riscv_vluxei_mask:
1688 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1689 /*IsStore*/ false,
1690 /*IsUnitStrided*/ false);
1691 case Intrinsic::riscv_vsse:
1692 case Intrinsic::riscv_vsse_mask:
1693 case Intrinsic::riscv_vsoxei:
1694 case Intrinsic::riscv_vsoxei_mask:
1695 case Intrinsic::riscv_vsuxei:
1696 case Intrinsic::riscv_vsuxei_mask:
1697 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1698 /*IsStore*/ true,
1699 /*IsUnitStrided*/ false);
1700 case Intrinsic::riscv_vlseg2:
1701 case Intrinsic::riscv_vlseg3:
1702 case Intrinsic::riscv_vlseg4:
1703 case Intrinsic::riscv_vlseg5:
1704 case Intrinsic::riscv_vlseg6:
1705 case Intrinsic::riscv_vlseg7:
1706 case Intrinsic::riscv_vlseg8:
1707 case Intrinsic::riscv_vlseg2ff:
1708 case Intrinsic::riscv_vlseg3ff:
1709 case Intrinsic::riscv_vlseg4ff:
1710 case Intrinsic::riscv_vlseg5ff:
1711 case Intrinsic::riscv_vlseg6ff:
1712 case Intrinsic::riscv_vlseg7ff:
1713 case Intrinsic::riscv_vlseg8ff:
1714 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1715 /*IsStore*/ false,
1716 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1717 case Intrinsic::riscv_vlseg2_mask:
1718 case Intrinsic::riscv_vlseg3_mask:
1719 case Intrinsic::riscv_vlseg4_mask:
1720 case Intrinsic::riscv_vlseg5_mask:
1721 case Intrinsic::riscv_vlseg6_mask:
1722 case Intrinsic::riscv_vlseg7_mask:
1723 case Intrinsic::riscv_vlseg8_mask:
1724 case Intrinsic::riscv_vlseg2ff_mask:
1725 case Intrinsic::riscv_vlseg3ff_mask:
1726 case Intrinsic::riscv_vlseg4ff_mask:
1727 case Intrinsic::riscv_vlseg5ff_mask:
1728 case Intrinsic::riscv_vlseg6ff_mask:
1729 case Intrinsic::riscv_vlseg7ff_mask:
1730 case Intrinsic::riscv_vlseg8ff_mask:
1731 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1732 /*IsStore*/ false,
1733 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1734 case Intrinsic::riscv_vlsseg2:
1735 case Intrinsic::riscv_vlsseg3:
1736 case Intrinsic::riscv_vlsseg4:
1737 case Intrinsic::riscv_vlsseg5:
1738 case Intrinsic::riscv_vlsseg6:
1739 case Intrinsic::riscv_vlsseg7:
1740 case Intrinsic::riscv_vlsseg8:
1741 case Intrinsic::riscv_vloxseg2:
1742 case Intrinsic::riscv_vloxseg3:
1743 case Intrinsic::riscv_vloxseg4:
1744 case Intrinsic::riscv_vloxseg5:
1745 case Intrinsic::riscv_vloxseg6:
1746 case Intrinsic::riscv_vloxseg7:
1747 case Intrinsic::riscv_vloxseg8:
1748 case Intrinsic::riscv_vluxseg2:
1749 case Intrinsic::riscv_vluxseg3:
1750 case Intrinsic::riscv_vluxseg4:
1751 case Intrinsic::riscv_vluxseg5:
1752 case Intrinsic::riscv_vluxseg6:
1753 case Intrinsic::riscv_vluxseg7:
1754 case Intrinsic::riscv_vluxseg8:
1755 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1756 /*IsStore*/ false,
1757 /*IsUnitStrided*/ false);
1758 case Intrinsic::riscv_vlsseg2_mask:
1759 case Intrinsic::riscv_vlsseg3_mask:
1760 case Intrinsic::riscv_vlsseg4_mask:
1761 case Intrinsic::riscv_vlsseg5_mask:
1762 case Intrinsic::riscv_vlsseg6_mask:
1763 case Intrinsic::riscv_vlsseg7_mask:
1764 case Intrinsic::riscv_vlsseg8_mask:
1765 case Intrinsic::riscv_vloxseg2_mask:
1766 case Intrinsic::riscv_vloxseg3_mask:
1767 case Intrinsic::riscv_vloxseg4_mask:
1768 case Intrinsic::riscv_vloxseg5_mask:
1769 case Intrinsic::riscv_vloxseg6_mask:
1770 case Intrinsic::riscv_vloxseg7_mask:
1771 case Intrinsic::riscv_vloxseg8_mask:
1772 case Intrinsic::riscv_vluxseg2_mask:
1773 case Intrinsic::riscv_vluxseg3_mask:
1774 case Intrinsic::riscv_vluxseg4_mask:
1775 case Intrinsic::riscv_vluxseg5_mask:
1776 case Intrinsic::riscv_vluxseg6_mask:
1777 case Intrinsic::riscv_vluxseg7_mask:
1778 case Intrinsic::riscv_vluxseg8_mask:
1779 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1780 /*IsStore*/ false,
1781 /*IsUnitStrided*/ false);
1782 case Intrinsic::riscv_vsseg2:
1783 case Intrinsic::riscv_vsseg3:
1784 case Intrinsic::riscv_vsseg4:
1785 case Intrinsic::riscv_vsseg5:
1786 case Intrinsic::riscv_vsseg6:
1787 case Intrinsic::riscv_vsseg7:
1788 case Intrinsic::riscv_vsseg8:
1789 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1790 /*IsStore*/ true,
1791 /*IsUnitStrided*/ false);
1792 case Intrinsic::riscv_vsseg2_mask:
1793 case Intrinsic::riscv_vsseg3_mask:
1794 case Intrinsic::riscv_vsseg4_mask:
1795 case Intrinsic::riscv_vsseg5_mask:
1796 case Intrinsic::riscv_vsseg6_mask:
1797 case Intrinsic::riscv_vsseg7_mask:
1798 case Intrinsic::riscv_vsseg8_mask:
1799 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1800 /*IsStore*/ true,
1801 /*IsUnitStrided*/ false);
1802 case Intrinsic::riscv_vssseg2:
1803 case Intrinsic::riscv_vssseg3:
1804 case Intrinsic::riscv_vssseg4:
1805 case Intrinsic::riscv_vssseg5:
1806 case Intrinsic::riscv_vssseg6:
1807 case Intrinsic::riscv_vssseg7:
1808 case Intrinsic::riscv_vssseg8:
1809 case Intrinsic::riscv_vsoxseg2:
1810 case Intrinsic::riscv_vsoxseg3:
1811 case Intrinsic::riscv_vsoxseg4:
1812 case Intrinsic::riscv_vsoxseg5:
1813 case Intrinsic::riscv_vsoxseg6:
1814 case Intrinsic::riscv_vsoxseg7:
1815 case Intrinsic::riscv_vsoxseg8:
1816 case Intrinsic::riscv_vsuxseg2:
1817 case Intrinsic::riscv_vsuxseg3:
1818 case Intrinsic::riscv_vsuxseg4:
1819 case Intrinsic::riscv_vsuxseg5:
1820 case Intrinsic::riscv_vsuxseg6:
1821 case Intrinsic::riscv_vsuxseg7:
1822 case Intrinsic::riscv_vsuxseg8:
1823 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1824 /*IsStore*/ true,
1825 /*IsUnitStrided*/ false);
1826 case Intrinsic::riscv_vssseg2_mask:
1827 case Intrinsic::riscv_vssseg3_mask:
1828 case Intrinsic::riscv_vssseg4_mask:
1829 case Intrinsic::riscv_vssseg5_mask:
1830 case Intrinsic::riscv_vssseg6_mask:
1831 case Intrinsic::riscv_vssseg7_mask:
1832 case Intrinsic::riscv_vssseg8_mask:
1833 case Intrinsic::riscv_vsoxseg2_mask:
1834 case Intrinsic::riscv_vsoxseg3_mask:
1835 case Intrinsic::riscv_vsoxseg4_mask:
1836 case Intrinsic::riscv_vsoxseg5_mask:
1837 case Intrinsic::riscv_vsoxseg6_mask:
1838 case Intrinsic::riscv_vsoxseg7_mask:
1839 case Intrinsic::riscv_vsoxseg8_mask:
1840 case Intrinsic::riscv_vsuxseg2_mask:
1841 case Intrinsic::riscv_vsuxseg3_mask:
1842 case Intrinsic::riscv_vsuxseg4_mask:
1843 case Intrinsic::riscv_vsuxseg5_mask:
1844 case Intrinsic::riscv_vsuxseg6_mask:
1845 case Intrinsic::riscv_vsuxseg7_mask:
1846 case Intrinsic::riscv_vsuxseg8_mask:
1847 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1848 /*IsStore*/ true,
1849 /*IsUnitStrided*/ false);
1850 }
1851}
1852
1854 const AddrMode &AM, Type *Ty,
1855 unsigned AS,
1856 Instruction *I) const {
1857 // No global is ever allowed as a base.
1858 if (AM.BaseGV)
1859 return false;
1860
1861 // None of our addressing modes allows a scalable offset
1862 if (AM.ScalableOffset)
1863 return false;
1864
1865 // RVV instructions only support register addressing.
1866 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1867 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1868
1869 // Require a 12-bit signed offset.
1870 if (!isInt<12>(AM.BaseOffs))
1871 return false;
1872
1873 switch (AM.Scale) {
1874 case 0: // "r+i" or just "i", depending on HasBaseReg.
1875 break;
1876 case 1:
1877 if (!AM.HasBaseReg) // allow "r+i".
1878 break;
1879 return false; // disallow "r+r" or "r+r+i".
1880 default:
1881 return false;
1882 }
1883
1884 return true;
1885}
1886
1888 return isInt<12>(Imm);
1889}
1890
1892 return isInt<12>(Imm);
1893}
1894
1895// On RV32, 64-bit integers are split into their high and low parts and held
1896// in two different registers, so the trunc is free since the low register can
1897// just be used.
1898// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1899// isTruncateFree?
1901 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1902 return false;
1903 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1904 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1905 return (SrcBits == 64 && DestBits == 32);
1906}
1907
1909 // We consider i64->i32 free on RV64 since we have good selection of W
1910 // instructions that make promoting operations back to i64 free in many cases.
1911 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1912 !DstVT.isInteger())
1913 return false;
1914 unsigned SrcBits = SrcVT.getSizeInBits();
1915 unsigned DestBits = DstVT.getSizeInBits();
1916 return (SrcBits == 64 && DestBits == 32);
1917}
1918
1920 EVT SrcVT = Val.getValueType();
1921 // free truncate from vnsrl and vnsra
1922 if (Subtarget.hasVInstructions() &&
1923 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1924 SrcVT.isVector() && VT2.isVector()) {
1925 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1926 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1927 if (SrcBits == DestBits * 2) {
1928 return true;
1929 }
1930 }
1931 return TargetLowering::isTruncateFree(Val, VT2);
1932}
1933
1935 // Zexts are free if they can be combined with a load.
1936 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1937 // poorly with type legalization of compares preferring sext.
1938 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1939 EVT MemVT = LD->getMemoryVT();
1940 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1941 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1942 LD->getExtensionType() == ISD::ZEXTLOAD))
1943 return true;
1944 }
1945
1946 return TargetLowering::isZExtFree(Val, VT2);
1947}
1948
1950 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1951}
1952
1954 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1955}
1956
1958 return Subtarget.hasStdExtZbb() ||
1959 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1960}
1961
1963 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1964 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1965}
1966
1968 const Instruction &AndI) const {
1969 // We expect to be able to match a bit extraction instruction if the Zbs
1970 // extension is supported and the mask is a power of two. However, we
1971 // conservatively return false if the mask would fit in an ANDI instruction,
1972 // on the basis that it's possible the sinking+duplication of the AND in
1973 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1974 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1975 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1976 return false;
1978 if (!Mask)
1979 return false;
1980 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1981}
1982
1984 EVT VT = Y.getValueType();
1985
1986 // FIXME: Support vectors once we have tests.
1987 if (VT.isVector())
1988 return false;
1989
1990 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1991 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
1992}
1993
1995 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1996 if (Subtarget.hasStdExtZbs())
1997 return X.getValueType().isScalarInteger();
1998 auto *C = dyn_cast<ConstantSDNode>(Y);
1999 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2000 if (Subtarget.hasVendorXTHeadBs())
2001 return C != nullptr;
2002 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2003 return C && C->getAPIntValue().ule(10);
2004}
2005
2007 EVT VT) const {
2008 // Only enable for rvv.
2009 if (!VT.isVector() || !Subtarget.hasVInstructions())
2010 return false;
2011
2012 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2013 return false;
2014
2015 return true;
2016}
2017
2019 Type *Ty) const {
2020 assert(Ty->isIntegerTy());
2021
2022 unsigned BitSize = Ty->getIntegerBitWidth();
2023 if (BitSize > Subtarget.getXLen())
2024 return false;
2025
2026 // Fast path, assume 32-bit immediates are cheap.
2027 int64_t Val = Imm.getSExtValue();
2028 if (isInt<32>(Val))
2029 return true;
2030
2031 // A constant pool entry may be more aligned thant he load we're trying to
2032 // replace. If we don't support unaligned scalar mem, prefer the constant
2033 // pool.
2034 // TODO: Can the caller pass down the alignment?
2035 if (!Subtarget.enableUnalignedScalarMem())
2036 return true;
2037
2038 // Prefer to keep the load if it would require many instructions.
2039 // This uses the same threshold we use for constant pools but doesn't
2040 // check useConstantPoolForLargeInts.
2041 // TODO: Should we keep the load only when we're definitely going to emit a
2042 // constant pool?
2043
2045 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2046}
2047
2051 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2052 SelectionDAG &DAG) const {
2053 // One interesting pattern that we'd want to form is 'bit extract':
2054 // ((1 >> Y) & 1) ==/!= 0
2055 // But we also need to be careful not to try to reverse that fold.
2056
2057 // Is this '((1 >> Y) & 1)'?
2058 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2059 return false; // Keep the 'bit extract' pattern.
2060
2061 // Will this be '((1 >> Y) & 1)' after the transform?
2062 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2063 return true; // Do form the 'bit extract' pattern.
2064
2065 // If 'X' is a constant, and we transform, then we will immediately
2066 // try to undo the fold, thus causing endless combine loop.
2067 // So only do the transform if X is not a constant. This matches the default
2068 // implementation of this function.
2069 return !XC;
2070}
2071
2072bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
2073 switch (Opcode) {
2074 case Instruction::Add:
2075 case Instruction::Sub:
2076 case Instruction::Mul:
2077 case Instruction::And:
2078 case Instruction::Or:
2079 case Instruction::Xor:
2080 case Instruction::FAdd:
2081 case Instruction::FSub:
2082 case Instruction::FMul:
2083 case Instruction::FDiv:
2084 case Instruction::ICmp:
2085 case Instruction::FCmp:
2086 return true;
2087 case Instruction::Shl:
2088 case Instruction::LShr:
2089 case Instruction::AShr:
2090 case Instruction::UDiv:
2091 case Instruction::SDiv:
2092 case Instruction::URem:
2093 case Instruction::SRem:
2094 case Instruction::Select:
2095 return Operand == 1;
2096 default:
2097 return false;
2098 }
2099}
2100
2101
2103 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2104 return false;
2105
2106 if (canSplatOperand(I->getOpcode(), Operand))
2107 return true;
2108
2109 auto *II = dyn_cast<IntrinsicInst>(I);
2110 if (!II)
2111 return false;
2112
2113 switch (II->getIntrinsicID()) {
2114 case Intrinsic::fma:
2115 case Intrinsic::vp_fma:
2116 return Operand == 0 || Operand == 1;
2117 case Intrinsic::vp_shl:
2118 case Intrinsic::vp_lshr:
2119 case Intrinsic::vp_ashr:
2120 case Intrinsic::vp_udiv:
2121 case Intrinsic::vp_sdiv:
2122 case Intrinsic::vp_urem:
2123 case Intrinsic::vp_srem:
2124 case Intrinsic::ssub_sat:
2125 case Intrinsic::vp_ssub_sat:
2126 case Intrinsic::usub_sat:
2127 case Intrinsic::vp_usub_sat:
2128 return Operand == 1;
2129 // These intrinsics are commutative.
2130 case Intrinsic::vp_add:
2131 case Intrinsic::vp_mul:
2132 case Intrinsic::vp_and:
2133 case Intrinsic::vp_or:
2134 case Intrinsic::vp_xor:
2135 case Intrinsic::vp_fadd:
2136 case Intrinsic::vp_fmul:
2137 case Intrinsic::vp_icmp:
2138 case Intrinsic::vp_fcmp:
2139 case Intrinsic::smin:
2140 case Intrinsic::vp_smin:
2141 case Intrinsic::umin:
2142 case Intrinsic::vp_umin:
2143 case Intrinsic::smax:
2144 case Intrinsic::vp_smax:
2145 case Intrinsic::umax:
2146 case Intrinsic::vp_umax:
2147 case Intrinsic::sadd_sat:
2148 case Intrinsic::vp_sadd_sat:
2149 case Intrinsic::uadd_sat:
2150 case Intrinsic::vp_uadd_sat:
2151 // These intrinsics have 'vr' versions.
2152 case Intrinsic::vp_sub:
2153 case Intrinsic::vp_fsub:
2154 case Intrinsic::vp_fdiv:
2155 return Operand == 0 || Operand == 1;
2156 default:
2157 return false;
2158 }
2159}
2160
2161/// Check if sinking \p I's operands to I's basic block is profitable, because
2162/// the operands can be folded into a target instruction, e.g.
2163/// splats of scalars can fold into vector instructions.
2165 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2166 using namespace llvm::PatternMatch;
2167
2168 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2169 return false;
2170
2171 // Don't sink splat operands if the target prefers it. Some targets requires
2172 // S2V transfer buffers and we can run out of them copying the same value
2173 // repeatedly.
2174 // FIXME: It could still be worth doing if it would improve vector register
2175 // pressure and prevent a vector spill.
2176 if (!Subtarget.sinkSplatOperands())
2177 return false;
2178
2179 for (auto OpIdx : enumerate(I->operands())) {
2180 if (!canSplatOperand(I, OpIdx.index()))
2181 continue;
2182
2183 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2184 // Make sure we are not already sinking this operand
2185 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2186 continue;
2187
2188 // We are looking for a splat that can be sunk.
2190 m_Undef(), m_ZeroMask())))
2191 continue;
2192
2193 // Don't sink i1 splats.
2194 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2195 continue;
2196
2197 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2198 // and vector registers
2199 for (Use &U : Op->uses()) {
2200 Instruction *Insn = cast<Instruction>(U.getUser());
2201 if (!canSplatOperand(Insn, U.getOperandNo()))
2202 return false;
2203 }
2204
2205 Ops.push_back(&Op->getOperandUse(0));
2206 Ops.push_back(&OpIdx.value());
2207 }
2208 return true;
2209}
2210
2212 unsigned Opc = VecOp.getOpcode();
2213
2214 // Assume target opcodes can't be scalarized.
2215 // TODO - do we have any exceptions?
2216 if (Opc >= ISD::BUILTIN_OP_END)
2217 return false;
2218
2219 // If the vector op is not supported, try to convert to scalar.
2220 EVT VecVT = VecOp.getValueType();
2221 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2222 return true;
2223
2224 // If the vector op is supported, but the scalar op is not, the transform may
2225 // not be worthwhile.
2226 // Permit a vector binary operation can be converted to scalar binary
2227 // operation which is custom lowered with illegal type.
2228 EVT ScalarVT = VecVT.getScalarType();
2229 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2230 isOperationCustom(Opc, ScalarVT);
2231}
2232
2234 const GlobalAddressSDNode *GA) const {
2235 // In order to maximise the opportunity for common subexpression elimination,
2236 // keep a separate ADD node for the global address offset instead of folding
2237 // it in the global address node. Later peephole optimisations may choose to
2238 // fold it back in when profitable.
2239 return false;
2240}
2241
2242// Returns 0-31 if the fli instruction is available for the type and this is
2243// legal FP immediate for the type. Returns -1 otherwise.
2245 if (!Subtarget.hasStdExtZfa())
2246 return -1;
2247
2248 bool IsSupportedVT = false;
2249 if (VT == MVT::f16) {
2250 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2251 } else if (VT == MVT::f32) {
2252 IsSupportedVT = true;
2253 } else if (VT == MVT::f64) {
2254 assert(Subtarget.hasStdExtD() && "Expect D extension");
2255 IsSupportedVT = true;
2256 }
2257
2258 if (!IsSupportedVT)
2259 return -1;
2260
2261 return RISCVLoadFPImm::getLoadFPImm(Imm);
2262}
2263
2265 bool ForCodeSize) const {
2266 bool IsLegalVT = false;
2267 if (VT == MVT::f16)
2268 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2269 else if (VT == MVT::f32)
2270 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2271 else if (VT == MVT::f64)
2272 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2273 else if (VT == MVT::bf16)
2274 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2275
2276 if (!IsLegalVT)
2277 return false;
2278
2279 if (getLegalZfaFPImm(Imm, VT) >= 0)
2280 return true;
2281
2282 // Cannot create a 64 bit floating-point immediate value for rv32.
2283 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2284 // td can handle +0.0 or -0.0 already.
2285 // -0.0 can be created by fmv + fneg.
2286 return Imm.isZero();
2287 }
2288
2289 // Special case: fmv + fneg
2290 if (Imm.isNegZero())
2291 return true;
2292
2293 // Building an integer and then converting requires a fmv at the end of
2294 // the integer sequence. The fmv is not required for Zfinx.
2295 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2296 const int Cost =
2297 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2298 Subtarget.getXLen(), Subtarget);
2299 return Cost <= FPImmCost;
2300}
2301
2302// TODO: This is very conservative.
2304 unsigned Index) const {
2306 return false;
2307
2308 // Only support extracting a fixed from a fixed vector for now.
2309 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2310 return false;
2311
2312 EVT EltVT = ResVT.getVectorElementType();
2313 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2314
2315 // The smallest type we can slide is i8.
2316 // TODO: We can extract index 0 from a mask vector without a slide.
2317 if (EltVT == MVT::i1)
2318 return false;
2319
2320 unsigned ResElts = ResVT.getVectorNumElements();
2321 unsigned SrcElts = SrcVT.getVectorNumElements();
2322
2323 unsigned MinVLen = Subtarget.getRealMinVLen();
2324 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2325
2326 // If we're extracting only data from the first VLEN bits of the source
2327 // then we can always do this with an m1 vslidedown.vx. Restricting the
2328 // Index ensures we can use a vslidedown.vi.
2329 // TODO: We can generalize this when the exact VLEN is known.
2330 if (Index + ResElts <= MinVLMAX && Index < 31)
2331 return true;
2332
2333 // Convervatively only handle extracting half of a vector.
2334 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2335 // a cheap extract. However, this case is important in practice for
2336 // shuffled extracts of longer vectors. How resolve?
2337 if ((ResElts * 2) != SrcElts)
2338 return false;
2339
2340 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2341 // cheap.
2342 if (Index >= 32)
2343 return false;
2344
2345 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2346 // the upper half of a vector until we have more test coverage.
2347 return Index == 0 || Index == ResElts;
2348}
2349
2351 CallingConv::ID CC,
2352 EVT VT) const {
2353 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2354 // We might still end up using a GPR but that will be decided based on ABI.
2355 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2356 !Subtarget.hasStdExtZfhminOrZhinxmin())
2357 return MVT::f32;
2358
2360
2361 return PartVT;
2362}
2363
2365 CallingConv::ID CC,
2366 EVT VT) const {
2367 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2368 // We might still end up using a GPR but that will be decided based on ABI.
2369 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2370 !Subtarget.hasStdExtZfhminOrZhinxmin())
2371 return 1;
2372
2374}
2375
2377 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2378 unsigned &NumIntermediates, MVT &RegisterVT) const {
2380 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2381
2382 return NumRegs;
2383}
2384
2385// Changes the condition code and swaps operands if necessary, so the SetCC
2386// operation matches one of the comparisons supported directly by branches
2387// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2388// with 1/-1.
2389static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2390 ISD::CondCode &CC, SelectionDAG &DAG) {
2391 // If this is a single bit test that can't be handled by ANDI, shift the
2392 // bit to be tested to the MSB and perform a signed compare with 0.
2393 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2394 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2395 isa<ConstantSDNode>(LHS.getOperand(1))) {
2396 uint64_t Mask = LHS.getConstantOperandVal(1);
2397 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2398 unsigned ShAmt = 0;
2399 if (isPowerOf2_64(Mask)) {
2401 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2402 } else {
2403 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2404 }
2405
2406 LHS = LHS.getOperand(0);
2407 if (ShAmt != 0)
2408 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2409 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2410 return;
2411 }
2412 }
2413
2414 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2415 int64_t C = RHSC->getSExtValue();
2416 switch (CC) {
2417 default: break;
2418 case ISD::SETGT:
2419 // Convert X > -1 to X >= 0.
2420 if (C == -1) {
2421 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2422 CC = ISD::SETGE;
2423 return;
2424 }
2425 break;
2426 case ISD::SETLT:
2427 // Convert X < 1 to 0 >= X.
2428 if (C == 1) {
2429 RHS = LHS;
2430 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2431 CC = ISD::SETGE;
2432 return;
2433 }
2434 break;
2435 }
2436 }
2437
2438 switch (CC) {
2439 default:
2440 break;
2441 case ISD::SETGT:
2442 case ISD::SETLE:
2443 case ISD::SETUGT:
2444 case ISD::SETULE:
2446 std::swap(LHS, RHS);
2447 break;
2448 }
2449}
2450
2452 if (VT.isRISCVVectorTuple()) {
2453 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2454 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2455 return RISCVII::LMUL_F8;
2456 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2457 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2458 return RISCVII::LMUL_F4;
2459 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2460 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2461 return RISCVII::LMUL_F2;
2462 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2463 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2464 return RISCVII::LMUL_1;
2465 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2466 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2467 return RISCVII::LMUL_2;
2468 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2469 return RISCVII::LMUL_4;
2470 llvm_unreachable("Invalid vector tuple type LMUL.");
2471 }
2472
2473 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2474 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2475 if (VT.getVectorElementType() == MVT::i1)
2476 KnownSize *= 8;
2477
2478 switch (KnownSize) {
2479 default:
2480 llvm_unreachable("Invalid LMUL.");
2481 case 8:
2483 case 16:
2485 case 32:
2487 case 64:
2489 case 128:
2491 case 256:
2493 case 512:
2495 }
2496}
2497
2499 switch (LMul) {
2500 default:
2501 llvm_unreachable("Invalid LMUL.");
2506 return RISCV::VRRegClassID;
2508 return RISCV::VRM2RegClassID;
2510 return RISCV::VRM4RegClassID;
2512 return RISCV::VRM8RegClassID;
2513 }
2514}
2515
2517 RISCVII::VLMUL LMUL = getLMUL(VT);
2518 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2519 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2520 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2521 LMUL == RISCVII::VLMUL::LMUL_1) {
2522 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2523 "Unexpected subreg numbering");
2524 return RISCV::sub_vrm1_0 + Index;
2525 }
2526 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2527 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2528 "Unexpected subreg numbering");
2529 return RISCV::sub_vrm2_0 + Index;
2530 }
2531 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2532 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2533 "Unexpected subreg numbering");
2534 return RISCV::sub_vrm4_0 + Index;
2535 }
2536 llvm_unreachable("Invalid vector type.");
2537}
2538
2540 if (VT.isRISCVVectorTuple()) {
2541 unsigned NF = VT.getRISCVVectorTupleNumFields();
2542 unsigned RegsPerField = std::max(1U, (unsigned)VT.getSizeInBits() /
2543 (NF * RISCV::RVVBitsPerBlock));
2544 switch (RegsPerField) {
2545 case 1:
2546 if (NF == 2)
2547 return RISCV::VRN2M1RegClassID;
2548 if (NF == 3)
2549 return RISCV::VRN3M1RegClassID;
2550 if (NF == 4)
2551 return RISCV::VRN4M1RegClassID;
2552 if (NF == 5)
2553 return RISCV::VRN5M1RegClassID;
2554 if (NF == 6)
2555 return RISCV::VRN6M1RegClassID;
2556 if (NF == 7)
2557 return RISCV::VRN7M1RegClassID;
2558 if (NF == 8)
2559 return RISCV::VRN8M1RegClassID;
2560 break;
2561 case 2:
2562 if (NF == 2)
2563 return RISCV::VRN2M2RegClassID;
2564 if (NF == 3)
2565 return RISCV::VRN3M2RegClassID;
2566 if (NF == 4)
2567 return RISCV::VRN4M2RegClassID;
2568 break;
2569 case 4:
2570 assert(NF == 2);
2571 return RISCV::VRN2M4RegClassID;
2572 default:
2573 break;
2574 }
2575 llvm_unreachable("Invalid vector tuple type RegClass.");
2576 }
2577
2578 if (VT.getVectorElementType() == MVT::i1)
2579 return RISCV::VRRegClassID;
2580 return getRegClassIDForLMUL(getLMUL(VT));
2581}
2582
2583// Attempt to decompose a subvector insert/extract between VecVT and
2584// SubVecVT via subregister indices. Returns the subregister index that
2585// can perform the subvector insert/extract with the given element index, as
2586// well as the index corresponding to any leftover subvectors that must be
2587// further inserted/extracted within the register class for SubVecVT.
2588std::pair<unsigned, unsigned>
2590 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2591 const RISCVRegisterInfo *TRI) {
2592 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2593 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2594 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2595 "Register classes not ordered");
2596 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2597 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2598
2599 // If VecVT is a vector tuple type, either it's the tuple type with same
2600 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2601 if (VecVT.isRISCVVectorTuple()) {
2602 if (VecRegClassID == SubRegClassID)
2603 return {RISCV::NoSubRegister, 0};
2604
2605 assert(SubVecVT.isScalableVector() &&
2606 "Only allow scalable vector subvector.");
2607 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2608 "Invalid vector tuple insert/extract for vector and subvector with "
2609 "different LMUL.");
2610 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2611 }
2612
2613 // Try to compose a subregister index that takes us from the incoming
2614 // LMUL>1 register class down to the outgoing one. At each step we half
2615 // the LMUL:
2616 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2617 // Note that this is not guaranteed to find a subregister index, such as
2618 // when we are extracting from one VR type to another.
2619 unsigned SubRegIdx = RISCV::NoSubRegister;
2620 for (const unsigned RCID :
2621 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2622 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2623 VecVT = VecVT.getHalfNumVectorElementsVT();
2624 bool IsHi =
2625 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2626 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2627 getSubregIndexByMVT(VecVT, IsHi));
2628 if (IsHi)
2629 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2630 }
2631 return {SubRegIdx, InsertExtractIdx};
2632}
2633
2634// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2635// stores for those types.
2636bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2637 return !Subtarget.useRVVForFixedLengthVectors() ||
2638 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2639}
2640
2642 if (!ScalarTy.isSimple())
2643 return false;
2644 switch (ScalarTy.getSimpleVT().SimpleTy) {
2645 case MVT::iPTR:
2646 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2647 case MVT::i8:
2648 case MVT::i16:
2649 case MVT::i32:
2650 return true;
2651 case MVT::i64:
2652 return Subtarget.hasVInstructionsI64();
2653 case MVT::f16:
2654 return Subtarget.hasVInstructionsF16();
2655 case MVT::f32:
2656 return Subtarget.hasVInstructionsF32();
2657 case MVT::f64:
2658 return Subtarget.hasVInstructionsF64();
2659 default:
2660 return false;
2661 }
2662}
2663
2664
2665unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2666 return NumRepeatedDivisors;
2667}
2668
2670 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2671 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2672 "Unexpected opcode");
2673 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2674 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2676 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2677 if (!II)
2678 return SDValue();
2679 return Op.getOperand(II->VLOperand + 1 + HasChain);
2680}
2681
2683 const RISCVSubtarget &Subtarget) {
2684 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2685 if (!Subtarget.useRVVForFixedLengthVectors())
2686 return false;
2687
2688 // We only support a set of vector types with a consistent maximum fixed size
2689 // across all supported vector element types to avoid legalization issues.
2690 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2691 // fixed-length vector type we support is 1024 bytes.
2692 if (VT.getFixedSizeInBits() > 1024 * 8)
2693 return false;
2694
2695 unsigned MinVLen = Subtarget.getRealMinVLen();
2696
2697 MVT EltVT = VT.getVectorElementType();
2698
2699 // Don't use RVV for vectors we cannot scalarize if required.
2700 switch (EltVT.SimpleTy) {
2701 // i1 is supported but has different rules.
2702 default:
2703 return false;
2704 case MVT::i1:
2705 // Masks can only use a single register.
2706 if (VT.getVectorNumElements() > MinVLen)
2707 return false;
2708 MinVLen /= 8;
2709 break;
2710 case MVT::i8:
2711 case MVT::i16:
2712 case MVT::i32:
2713 break;
2714 case MVT::i64:
2715 if (!Subtarget.hasVInstructionsI64())
2716 return false;
2717 break;
2718 case MVT::f16:
2719 if (!Subtarget.hasVInstructionsF16Minimal())
2720 return false;
2721 break;
2722 case MVT::bf16:
2723 if (!Subtarget.hasVInstructionsBF16Minimal())
2724 return false;
2725 break;
2726 case MVT::f32:
2727 if (!Subtarget.hasVInstructionsF32())
2728 return false;
2729 break;
2730 case MVT::f64:
2731 if (!Subtarget.hasVInstructionsF64())
2732 return false;
2733 break;
2734 }
2735
2736 // Reject elements larger than ELEN.
2737 if (EltVT.getSizeInBits() > Subtarget.getELen())
2738 return false;
2739
2740 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2741 // Don't use RVV for types that don't fit.
2742 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2743 return false;
2744
2745 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2746 // the base fixed length RVV support in place.
2747 if (!VT.isPow2VectorType())
2748 return false;
2749
2750 return true;
2751}
2752
2753bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2754 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2755}
2756
2757// Return the largest legal scalable vector type that matches VT's element type.
2759 const RISCVSubtarget &Subtarget) {
2760 // This may be called before legal types are setup.
2761 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2762 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2763 "Expected legal fixed length vector!");
2764
2765 unsigned MinVLen = Subtarget.getRealMinVLen();
2766 unsigned MaxELen = Subtarget.getELen();
2767
2768 MVT EltVT = VT.getVectorElementType();
2769 switch (EltVT.SimpleTy) {
2770 default:
2771 llvm_unreachable("unexpected element type for RVV container");
2772 case MVT::i1:
2773 case MVT::i8:
2774 case MVT::i16:
2775 case MVT::i32:
2776 case MVT::i64:
2777 case MVT::bf16:
2778 case MVT::f16:
2779 case MVT::f32:
2780 case MVT::f64: {
2781 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2782 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2783 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2784 unsigned NumElts =
2786 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2787 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2788 return MVT::getScalableVectorVT(EltVT, NumElts);
2789 }
2790 }
2791}
2792
2794 const RISCVSubtarget &Subtarget) {
2796 Subtarget);
2797}
2798
2800 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2801}
2802
2803// Grow V to consume an entire RVV register.
2805 const RISCVSubtarget &Subtarget) {
2806 assert(VT.isScalableVector() &&
2807 "Expected to convert into a scalable vector!");
2808 assert(V.getValueType().isFixedLengthVector() &&
2809 "Expected a fixed length vector operand!");
2810 SDLoc DL(V);
2811 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2812 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2813}
2814
2815// Shrink V so it's just big enough to maintain a VT's worth of data.
2817 const RISCVSubtarget &Subtarget) {
2819 "Expected to convert into a fixed length vector!");
2820 assert(V.getValueType().isScalableVector() &&
2821 "Expected a scalable vector operand!");
2822 SDLoc DL(V);
2823 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2824 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2825}
2826
2827/// Return the type of the mask type suitable for masking the provided
2828/// vector type. This is simply an i1 element type vector of the same
2829/// (possibly scalable) length.
2830static MVT getMaskTypeFor(MVT VecVT) {
2831 assert(VecVT.isVector());
2833 return MVT::getVectorVT(MVT::i1, EC);
2834}
2835
2836/// Creates an all ones mask suitable for masking a vector of type VecTy with
2837/// vector length VL. .
2838static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2839 SelectionDAG &DAG) {
2840 MVT MaskVT = getMaskTypeFor(VecVT);
2841 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2842}
2843
2844static std::pair<SDValue, SDValue>
2846 const RISCVSubtarget &Subtarget) {
2847 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2848 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2849 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2850 return {Mask, VL};
2851}
2852
2853static std::pair<SDValue, SDValue>
2854getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2855 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2856 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2857 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2858 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2859 return {Mask, VL};
2860}
2861
2862// Gets the two common "VL" operands: an all-ones mask and the vector length.
2863// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2864// the vector type that the fixed-length vector is contained in. Otherwise if
2865// VecVT is scalable, then ContainerVT should be the same as VecVT.
2866static std::pair<SDValue, SDValue>
2867getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2868 const RISCVSubtarget &Subtarget) {
2869 if (VecVT.isFixedLengthVector())
2870 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2871 Subtarget);
2872 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2873 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2874}
2875
2877 SelectionDAG &DAG) const {
2878 assert(VecVT.isScalableVector() && "Expected scalable vector");
2879 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2880 VecVT.getVectorElementCount());
2881}
2882
2883std::pair<unsigned, unsigned>
2885 const RISCVSubtarget &Subtarget) {
2886 assert(VecVT.isScalableVector() && "Expected scalable vector");
2887
2888 unsigned EltSize = VecVT.getScalarSizeInBits();
2889 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2890
2891 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2892 unsigned MaxVLMAX =
2893 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2894
2895 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2896 unsigned MinVLMAX =
2897 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2898
2899 return std::make_pair(MinVLMAX, MaxVLMAX);
2900}
2901
2902// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2903// of either is (currently) supported. This can get us into an infinite loop
2904// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2905// as a ..., etc.
2906// Until either (or both) of these can reliably lower any node, reporting that
2907// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2908// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2909// which is not desirable.
2911 EVT VT, unsigned DefinedValues) const {
2912 return false;
2913}
2914
2916 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2917 // implementation-defined.
2918 if (!VT.isVector())
2920 unsigned DLenFactor = Subtarget.getDLenFactor();
2921 unsigned Cost;
2922 if (VT.isScalableVector()) {
2923 unsigned LMul;
2924 bool Fractional;
2925 std::tie(LMul, Fractional) =
2927 if (Fractional)
2928 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2929 else
2930 Cost = (LMul * DLenFactor);
2931 } else {
2932 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2933 }
2934 return Cost;
2935}
2936
2937
2938/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2939/// is generally quadratic in the number of vreg implied by LMUL. Note that
2940/// operand (index and possibly mask) are handled separately.
2944
2945/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2946/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2947/// or may track the vrgather.vv cost. It is implementation-dependent.
2951
2952/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2953/// for the type VT. (This does not cover the vslide1up or vslide1down
2954/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2955/// or may track the vrgather.vv cost. It is implementation-dependent.
2959
2960/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2961/// for the type VT. (This does not cover the vslide1up or vslide1down
2962/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2963/// or may track the vrgather.vv cost. It is implementation-dependent.
2967
2969 const RISCVSubtarget &Subtarget) {
2970 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2971 // bf16 conversions are always promoted to f32.
2972 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2973 Op.getValueType() == MVT::bf16) {
2974 bool IsStrict = Op->isStrictFPOpcode();
2975
2976 SDLoc DL(Op);
2977 if (IsStrict) {
2978 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2979 {Op.getOperand(0), Op.getOperand(1)});
2980 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2981 {Op.getValueType(), MVT::Other},
2982 {Val.getValue(1), Val.getValue(0),
2983 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2984 }
2985 return DAG.getNode(
2986 ISD::FP_ROUND, DL, Op.getValueType(),
2987 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2988 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2989 }
2990
2991 // Other operations are legal.
2992 return Op;
2993}
2994
2996 const RISCVSubtarget &Subtarget) {
2997 // RISC-V FP-to-int conversions saturate to the destination register size, but
2998 // don't produce 0 for nan. We can use a conversion instruction and fix the
2999 // nan case with a compare and a select.
3000 SDValue Src = Op.getOperand(0);
3001
3002 MVT DstVT = Op.getSimpleValueType();
3003 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3004
3005 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3006
3007 if (!DstVT.isVector()) {
3008 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
3009 // the result.
3010 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3011 Src.getValueType() == MVT::bf16) {
3012 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3013 }
3014
3015 unsigned Opc;
3016 if (SatVT == DstVT)
3017 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3018 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3020 else
3021 return SDValue();
3022 // FIXME: Support other SatVTs by clamping before or after the conversion.
3023
3024 SDLoc DL(Op);
3025 SDValue FpToInt = DAG.getNode(
3026 Opc, DL, DstVT, Src,
3028
3029 if (Opc == RISCVISD::FCVT_WU_RV64)
3030 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3031
3032 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3033 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3035 }
3036
3037 // Vectors.
3038
3039 MVT DstEltVT = DstVT.getVectorElementType();
3040 MVT SrcVT = Src.getSimpleValueType();
3041 MVT SrcEltVT = SrcVT.getVectorElementType();
3042 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3043 unsigned DstEltSize = DstEltVT.getSizeInBits();
3044
3045 // Only handle saturating to the destination type.
3046 if (SatVT != DstEltVT)
3047 return SDValue();
3048
3049 MVT DstContainerVT = DstVT;
3050 MVT SrcContainerVT = SrcVT;
3051 if (DstVT.isFixedLengthVector()) {
3052 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3053 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3054 assert(DstContainerVT.getVectorElementCount() ==
3055 SrcContainerVT.getVectorElementCount() &&
3056 "Expected same element count");
3057 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3058 }
3059
3060 SDLoc DL(Op);
3061
3062 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3063
3064 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3065 {Src, Src, DAG.getCondCode(ISD::SETNE),
3066 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3067
3068 // Need to widen by more than 1 step, promote the FP type, then do a widening
3069 // convert.
3070 if (DstEltSize > (2 * SrcEltSize)) {
3071 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3072 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3073 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3074 }
3075
3076 MVT CvtContainerVT = DstContainerVT;
3077 MVT CvtEltVT = DstEltVT;
3078 if (SrcEltSize > (2 * DstEltSize)) {
3079 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3080 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3081 }
3082
3083 unsigned RVVOpc =
3085 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3086
3087 while (CvtContainerVT != DstContainerVT) {
3088 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3089 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3090 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3091 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3093 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3094 }
3095
3096 SDValue SplatZero = DAG.getNode(
3097 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3098 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3099 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3100 Res, DAG.getUNDEF(DstContainerVT), VL);
3101
3102 if (DstVT.isFixedLengthVector())
3103 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3104
3105 return Res;
3106}
3107
3109 const RISCVSubtarget &Subtarget) {
3110 bool IsStrict = Op->isStrictFPOpcode();
3111 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3112
3113 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3114 // bf16 conversions are always promoted to f32.
3115 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3116 SrcVal.getValueType() == MVT::bf16) {
3117 SDLoc DL(Op);
3118 if (IsStrict) {
3119 SDValue Ext =
3120 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3121 {Op.getOperand(0), SrcVal});
3122 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3123 {Ext.getValue(1), Ext.getValue(0)});
3124 }
3125 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3126 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3127 }
3128
3129 // Other operations are legal.
3130 return Op;
3131}
3132
3134 switch (Opc) {
3135 case ISD::FROUNDEVEN:
3137 case ISD::VP_FROUNDEVEN:
3138 return RISCVFPRndMode::RNE;
3139 case ISD::FTRUNC:
3140 case ISD::STRICT_FTRUNC:
3141 case ISD::VP_FROUNDTOZERO:
3142 return RISCVFPRndMode::RTZ;
3143 case ISD::FFLOOR:
3144 case ISD::STRICT_FFLOOR:
3145 case ISD::VP_FFLOOR:
3146 return RISCVFPRndMode::RDN;
3147 case ISD::FCEIL:
3148 case ISD::STRICT_FCEIL:
3149 case ISD::VP_FCEIL:
3150 return RISCVFPRndMode::RUP;
3151 case ISD::FROUND:
3152 case ISD::STRICT_FROUND:
3153 case ISD::VP_FROUND:
3154 return RISCVFPRndMode::RMM;
3155 case ISD::FRINT:
3156 return RISCVFPRndMode::DYN;
3157 }
3158
3160}
3161
3162// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3163// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3164// the integer domain and back. Taking care to avoid converting values that are
3165// nan or already correct.
3166static SDValue
3168 const RISCVSubtarget &Subtarget) {
3169 MVT VT = Op.getSimpleValueType();
3170 assert(VT.isVector() && "Unexpected type");
3171
3172 SDLoc DL(Op);
3173
3174 SDValue Src = Op.getOperand(0);
3175
3176 MVT ContainerVT = VT;
3177 if (VT.isFixedLengthVector()) {
3178 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3179 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3180 }
3181
3182 SDValue Mask, VL;
3183 if (Op->isVPOpcode()) {
3184 Mask = Op.getOperand(1);
3185 if (VT.isFixedLengthVector())
3186 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3187 Subtarget);
3188 VL = Op.getOperand(2);
3189 } else {
3190 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3191 }
3192
3193 // Freeze the source since we are increasing the number of uses.
3194 Src = DAG.getFreeze(Src);
3195
3196 // We do the conversion on the absolute value and fix the sign at the end.
3197 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3198
3199 // Determine the largest integer that can be represented exactly. This and
3200 // values larger than it don't have any fractional bits so don't need to
3201 // be converted.
3202 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3203 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3204 APFloat MaxVal = APFloat(FltSem);
3205 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3206 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3207 SDValue MaxValNode =
3208 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3209 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3210 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3211
3212 // If abs(Src) was larger than MaxVal or nan, keep it.
3213 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3214 Mask =
3215 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3216 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3217 Mask, Mask, VL});
3218
3219 // Truncate to integer and convert back to FP.
3220 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3221 MVT XLenVT = Subtarget.getXLenVT();
3222 SDValue Truncated;
3223
3224 switch (Op.getOpcode()) {
3225 default:
3226 llvm_unreachable("Unexpected opcode");
3227 case ISD::FCEIL:
3228 case ISD::VP_FCEIL:
3229 case ISD::FFLOOR:
3230 case ISD::VP_FFLOOR:
3231 case ISD::FROUND:
3232 case ISD::FROUNDEVEN:
3233 case ISD::VP_FROUND:
3234 case ISD::VP_FROUNDEVEN:
3235 case ISD::VP_FROUNDTOZERO: {
3238 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3239 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3240 break;
3241 }
3242 case ISD::FTRUNC:
3243 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3244 Mask, VL);
3245 break;
3246 case ISD::FRINT:
3247 case ISD::VP_FRINT:
3248 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3249 break;
3250 case ISD::FNEARBYINT:
3251 case ISD::VP_FNEARBYINT:
3252 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3253 Mask, VL);
3254 break;
3255 }
3256
3257 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3258 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3259 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3260 Mask, VL);
3261
3262 // Restore the original sign so that -0.0 is preserved.
3263 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3264 Src, Src, Mask, VL);
3265
3266 if (!VT.isFixedLengthVector())
3267 return Truncated;
3268
3269 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3270}
3271
3272// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3273// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3274// qNan and coverting the new source to integer and back to FP.
3275static SDValue
3277 const RISCVSubtarget &Subtarget) {
3278 SDLoc DL(Op);
3279 MVT VT = Op.getSimpleValueType();
3280 SDValue Chain = Op.getOperand(0);
3281 SDValue Src = Op.getOperand(1);
3282
3283 MVT ContainerVT = VT;
3284 if (VT.isFixedLengthVector()) {
3285 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3286 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3287 }
3288
3289 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3290
3291 // Freeze the source since we are increasing the number of uses.
3292 Src = DAG.getFreeze(Src);
3293
3294 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3295 MVT MaskVT = Mask.getSimpleValueType();
3297 DAG.getVTList(MaskVT, MVT::Other),
3298 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3299 DAG.getUNDEF(MaskVT), Mask, VL});
3300 Chain = Unorder.getValue(1);
3302 DAG.getVTList(ContainerVT, MVT::Other),
3303 {Chain, Src, Src, Src, Unorder, VL});
3304 Chain = Src.getValue(1);
3305
3306 // We do the conversion on the absolute value and fix the sign at the end.
3307 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3308
3309 // Determine the largest integer that can be represented exactly. This and
3310 // values larger than it don't have any fractional bits so don't need to
3311 // be converted.
3312 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3313 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3314 APFloat MaxVal = APFloat(FltSem);
3315 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3316 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3317 SDValue MaxValNode =
3318 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3319 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3320 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3321
3322 // If abs(Src) was larger than MaxVal or nan, keep it.
3323 Mask = DAG.getNode(
3324 RISCVISD::SETCC_VL, DL, MaskVT,
3325 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3326
3327 // Truncate to integer and convert back to FP.
3328 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3329 MVT XLenVT = Subtarget.getXLenVT();
3330 SDValue Truncated;
3331
3332 switch (Op.getOpcode()) {
3333 default:
3334 llvm_unreachable("Unexpected opcode");
3335 case ISD::STRICT_FCEIL:
3336 case ISD::STRICT_FFLOOR:
3337 case ISD::STRICT_FROUND:
3341 Truncated = DAG.getNode(
3342 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3343 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3344 break;
3345 }
3346 case ISD::STRICT_FTRUNC:
3347 Truncated =
3349 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3350 break;
3353 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3354 Mask, VL);
3355 break;
3356 }
3357 Chain = Truncated.getValue(1);
3358
3359 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3360 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3361 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3362 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3363 Truncated, Mask, VL);
3364 Chain = Truncated.getValue(1);
3365 }
3366
3367 // Restore the original sign so that -0.0 is preserved.
3368 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3369 Src, Src, Mask, VL);
3370
3371 if (VT.isFixedLengthVector())
3372 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3373 return DAG.getMergeValues({Truncated, Chain}, DL);
3374}
3375
3376static SDValue
3378 const RISCVSubtarget &Subtarget) {
3379 MVT VT = Op.getSimpleValueType();
3380 if (VT.isVector())
3381 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3382
3383 if (DAG.shouldOptForSize())
3384 return SDValue();
3385
3386 SDLoc DL(Op);
3387 SDValue Src = Op.getOperand(0);
3388
3389 // Create an integer the size of the mantissa with the MSB set. This and all
3390 // values larger than it don't have any fractional bits so don't need to be
3391 // converted.
3392 const fltSemantics &FltSem = VT.getFltSemantics();
3393 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3394 APFloat MaxVal = APFloat(FltSem);
3395 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3396 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3397 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3398
3400 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3401 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3402}
3403
3404// Expand vector LRINT and LLRINT by converting to the integer domain.
3406 const RISCVSubtarget &Subtarget) {
3407 MVT VT = Op.getSimpleValueType();
3408 assert(VT.isVector() && "Unexpected type");
3409
3410 SDLoc DL(Op);
3411 SDValue Src = Op.getOperand(0);
3412 MVT ContainerVT = VT;
3413
3414 if (VT.isFixedLengthVector()) {
3415 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3416 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3417 }
3418
3419 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3420 SDValue Truncated =
3421 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3422
3423 if (!VT.isFixedLengthVector())
3424 return Truncated;
3425
3426 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3427}
3428
3429static SDValue
3431 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3432 SDValue Offset, SDValue Mask, SDValue VL,
3434 if (Passthru.isUndef())
3436 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3437 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3438 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3439}
3440
3441static SDValue
3442getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3443 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3444 SDValue VL,
3446 if (Passthru.isUndef())
3448 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3449 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3450 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3451}
3452
3453static MVT getLMUL1VT(MVT VT) {
3455 "Unexpected vector MVT");
3459}
3460
3464 int64_t Addend;
3465};
3466
3467static std::optional<APInt> getExactInteger(const APFloat &APF,
3469 // We will use a SINT_TO_FP to materialize this constant so we should use a
3470 // signed APSInt here.
3471 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3472 // We use an arbitrary rounding mode here. If a floating-point is an exact
3473 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3474 // the rounding mode changes the output value, then it is not an exact
3475 // integer.
3477 bool IsExact;
3478 // If it is out of signed integer range, it will return an invalid operation.
3479 // If it is not an exact integer, IsExact is false.
3480 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3482 !IsExact)
3483 return std::nullopt;
3484 return ValInt.extractBits(BitWidth, 0);
3485}
3486
3487// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3488// to the (non-zero) step S and start value X. This can be then lowered as the
3489// RVV sequence (VID * S) + X, for example.
3490// The step S is represented as an integer numerator divided by a positive
3491// denominator. Note that the implementation currently only identifies
3492// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3493// cannot detect 2/3, for example.
3494// Note that this method will also match potentially unappealing index
3495// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3496// determine whether this is worth generating code for.
3497//
3498// EltSizeInBits is the size of the type that the sequence will be calculated
3499// in, i.e. SEW for build_vectors or XLEN for address calculations.
3500static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3501 unsigned EltSizeInBits) {
3502 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3504 return std::nullopt;
3505 bool IsInteger = Op.getValueType().isInteger();
3506
3507 std::optional<unsigned> SeqStepDenom;
3508 std::optional<APInt> SeqStepNum;
3509 std::optional<APInt> SeqAddend;
3510 std::optional<std::pair<APInt, unsigned>> PrevElt;
3511 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3512
3513 // First extract the ops into a list of constant integer values. This may not
3514 // be possible for floats if they're not all representable as integers.
3516 const unsigned OpSize = Op.getScalarValueSizeInBits();
3517 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3518 if (Elt.isUndef()) {
3519 Elts[Idx] = std::nullopt;
3520 continue;
3521 }
3522 if (IsInteger) {
3523 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3524 } else {
3525 auto ExactInteger =
3526 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3527 if (!ExactInteger)
3528 return std::nullopt;
3529 Elts[Idx] = *ExactInteger;
3530 }
3531 }
3532
3533 for (auto [Idx, Elt] : enumerate(Elts)) {
3534 // Assume undef elements match the sequence; we just have to be careful
3535 // when interpolating across them.
3536 if (!Elt)
3537 continue;
3538
3539 if (PrevElt) {
3540 // Calculate the step since the last non-undef element, and ensure
3541 // it's consistent across the entire sequence.
3542 unsigned IdxDiff = Idx - PrevElt->second;
3543 APInt ValDiff = *Elt - PrevElt->first;
3544
3545 // A zero-value value difference means that we're somewhere in the middle
3546 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3547 // step change before evaluating the sequence.
3548 if (ValDiff == 0)
3549 continue;
3550
3551 int64_t Remainder = ValDiff.srem(IdxDiff);
3552 // Normalize the step if it's greater than 1.
3553 if (Remainder != ValDiff.getSExtValue()) {
3554 // The difference must cleanly divide the element span.
3555 if (Remainder != 0)
3556 return std::nullopt;
3557 ValDiff = ValDiff.sdiv(IdxDiff);
3558 IdxDiff = 1;
3559 }
3560
3561 if (!SeqStepNum)
3562 SeqStepNum = ValDiff;
3563 else if (ValDiff != SeqStepNum)
3564 return std::nullopt;
3565
3566 if (!SeqStepDenom)
3567 SeqStepDenom = IdxDiff;
3568 else if (IdxDiff != *SeqStepDenom)
3569 return std::nullopt;
3570 }
3571
3572 // Record this non-undef element for later.
3573 if (!PrevElt || PrevElt->first != *Elt)
3574 PrevElt = std::make_pair(*Elt, Idx);
3575 }
3576
3577 // We need to have logged a step for this to count as a legal index sequence.
3578 if (!SeqStepNum || !SeqStepDenom)
3579 return std::nullopt;
3580
3581 // Loop back through the sequence and validate elements we might have skipped
3582 // while waiting for a valid step. While doing this, log any sequence addend.
3583 for (auto [Idx, Elt] : enumerate(Elts)) {
3584 if (!Elt)
3585 continue;
3586 APInt ExpectedVal =
3587 (APInt(EltSizeInBits, Idx) * *SeqStepNum).sdiv(*SeqStepDenom);
3588
3589 APInt Addend = *Elt - ExpectedVal;
3590 if (!SeqAddend)
3591 SeqAddend = Addend;
3592 else if (Addend != SeqAddend)
3593 return std::nullopt;
3594 }
3595
3596 assert(SeqAddend && "Must have an addend if we have a step");
3597
3598 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3599 SeqAddend->getSExtValue()};
3600}
3601
3602// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3603// and lower it as a VRGATHER_VX_VL from the source vector.
3604static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3605 SelectionDAG &DAG,
3606 const RISCVSubtarget &Subtarget) {
3607 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3608 return SDValue();
3609 SDValue Vec = SplatVal.getOperand(0);
3610 // Only perform this optimization on vectors of the same size for simplicity.
3611 // Don't perform this optimization for i1 vectors.
3612 // FIXME: Support i1 vectors, maybe by promoting to i8?
3613 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3614 return SDValue();
3615 SDValue Idx = SplatVal.getOperand(1);
3616 // The index must be a legal type.
3617 if (Idx.getValueType() != Subtarget.getXLenVT())
3618 return SDValue();
3619
3620 MVT ContainerVT = VT;
3621 if (VT.isFixedLengthVector()) {
3622 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3623 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3624 }
3625
3626 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3627
3628 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3629 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3630
3631 if (!VT.isFixedLengthVector())
3632 return Gather;
3633
3634 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3635}
3636
3637
3638/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3639/// which constitute a large proportion of the elements. In such cases we can
3640/// splat a vector with the dominant element and make up the shortfall with
3641/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3642/// Note that this includes vectors of 2 elements by association. The
3643/// upper-most element is the "dominant" one, allowing us to use a splat to
3644/// "insert" the upper element, and an insert of the lower element at position
3645/// 0, which improves codegen.
3647 const RISCVSubtarget &Subtarget) {
3648 MVT VT = Op.getSimpleValueType();
3649 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3650
3651 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3652
3653 SDLoc DL(Op);
3654 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3655
3656 MVT XLenVT = Subtarget.getXLenVT();
3657 unsigned NumElts = Op.getNumOperands();
3658
3659 SDValue DominantValue;
3660 unsigned MostCommonCount = 0;
3661 DenseMap<SDValue, unsigned> ValueCounts;
3662 unsigned NumUndefElts =
3663 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3664
3665 // Track the number of scalar loads we know we'd be inserting, estimated as
3666 // any non-zero floating-point constant. Other kinds of element are either
3667 // already in registers or are materialized on demand. The threshold at which
3668 // a vector load is more desirable than several scalar materializion and
3669 // vector-insertion instructions is not known.
3670 unsigned NumScalarLoads = 0;
3671
3672 for (SDValue V : Op->op_values()) {
3673 if (V.isUndef())
3674 continue;
3675
3676 unsigned &Count = ValueCounts[V];
3677 if (0 == Count)
3678 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3679 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3680
3681 // Is this value dominant? In case of a tie, prefer the highest element as
3682 // it's cheaper to insert near the beginning of a vector than it is at the
3683 // end.
3684 if (++Count >= MostCommonCount) {
3685 DominantValue = V;
3686 MostCommonCount = Count;
3687 }
3688 }
3689
3690 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3691 unsigned NumDefElts = NumElts - NumUndefElts;
3692 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3693
3694 // Don't perform this optimization when optimizing for size, since
3695 // materializing elements and inserting them tends to cause code bloat.
3696 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3697 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3698 ((MostCommonCount > DominantValueCountThreshold) ||
3699 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3700 // Start by splatting the most common element.
3701 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3702
3703 DenseSet<SDValue> Processed{DominantValue};
3704
3705 // We can handle an insert into the last element (of a splat) via
3706 // v(f)slide1down. This is slightly better than the vslideup insert
3707 // lowering as it avoids the need for a vector group temporary. It
3708 // is also better than using vmerge.vx as it avoids the need to
3709 // materialize the mask in a vector register.
3710 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3711 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3712 LastOp != DominantValue) {
3713 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3714 auto OpCode =
3716 if (!VT.isFloatingPoint())
3717 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3718 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3719 LastOp, Mask, VL);
3720 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3721 Processed.insert(LastOp);
3722 }
3723
3724 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3725 for (const auto &OpIdx : enumerate(Op->ops())) {
3726 const SDValue &V = OpIdx.value();
3727 if (V.isUndef() || !Processed.insert(V).second)
3728 continue;
3729 if (ValueCounts[V] == 1) {
3730 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3731 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3732 } else {
3733 // Blend in all instances of this value using a VSELECT, using a
3734 // mask where each bit signals whether that element is the one
3735 // we're after.
3737 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3738 return DAG.getConstant(V == V1, DL, XLenVT);
3739 });
3740 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3741 DAG.getBuildVector(SelMaskTy, DL, Ops),
3742 DAG.getSplatBuildVector(VT, DL, V), Vec);
3743 }
3744 }
3745
3746 return Vec;
3747 }
3748
3749 return SDValue();
3750}
3751
3753 const RISCVSubtarget &Subtarget) {
3754 MVT VT = Op.getSimpleValueType();
3755 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3756
3757 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3758
3759 SDLoc DL(Op);
3760 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3761
3762 MVT XLenVT = Subtarget.getXLenVT();
3763 unsigned NumElts = Op.getNumOperands();
3764
3765 if (VT.getVectorElementType() == MVT::i1) {
3766 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3767 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3768 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3769 }
3770
3771 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3772 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3773 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3774 }
3775
3776 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3777 // scalar integer chunks whose bit-width depends on the number of mask
3778 // bits and XLEN.
3779 // First, determine the most appropriate scalar integer type to use. This
3780 // is at most XLenVT, but may be shrunk to a smaller vector element type
3781 // according to the size of the final vector - use i8 chunks rather than
3782 // XLenVT if we're producing a v8i1. This results in more consistent
3783 // codegen across RV32 and RV64.
3784 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3785 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3786 // If we have to use more than one INSERT_VECTOR_ELT then this
3787 // optimization is likely to increase code size; avoid peforming it in
3788 // such a case. We can use a load from a constant pool in this case.
3789 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3790 return SDValue();
3791 // Now we can create our integer vector type. Note that it may be larger
3792 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3793 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3794 MVT IntegerViaVecVT =
3795 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3796 IntegerViaVecElts);
3797
3798 uint64_t Bits = 0;
3799 unsigned BitPos = 0, IntegerEltIdx = 0;
3800 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3801
3802 for (unsigned I = 0; I < NumElts;) {
3803 SDValue V = Op.getOperand(I);
3804 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3805 Bits |= ((uint64_t)BitValue << BitPos);
3806 ++BitPos;
3807 ++I;
3808
3809 // Once we accumulate enough bits to fill our scalar type or process the
3810 // last element, insert into our vector and clear our accumulated data.
3811 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3812 if (NumViaIntegerBits <= 32)
3813 Bits = SignExtend64<32>(Bits);
3814 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3815 Elts[IntegerEltIdx] = Elt;
3816 Bits = 0;
3817 BitPos = 0;
3818 IntegerEltIdx++;
3819 }
3820 }
3821
3822 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3823
3824 if (NumElts < NumViaIntegerBits) {
3825 // If we're producing a smaller vector than our minimum legal integer
3826 // type, bitcast to the equivalent (known-legal) mask type, and extract
3827 // our final mask.
3828 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3829 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3830 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3831 DAG.getConstant(0, DL, XLenVT));
3832 } else {
3833 // Else we must have produced an integer type with the same size as the
3834 // mask type; bitcast for the final result.
3835 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3836 Vec = DAG.getBitcast(VT, Vec);
3837 }
3838
3839 return Vec;
3840 }
3841
3843 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3845 if (!VT.isFloatingPoint())
3846 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3847 Splat =
3848 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3849 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3850 }
3851
3852 // Try and match index sequences, which we can lower to the vid instruction
3853 // with optional modifications. An all-undef vector is matched by
3854 // getSplatValue, above.
3855 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3856 int64_t StepNumerator = SimpleVID->StepNumerator;
3857 unsigned StepDenominator = SimpleVID->StepDenominator;
3858 int64_t Addend = SimpleVID->Addend;
3859
3860 assert(StepNumerator != 0 && "Invalid step");
3861 bool Negate = false;
3862 int64_t SplatStepVal = StepNumerator;
3863 unsigned StepOpcode = ISD::MUL;
3864 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3865 // anyway as the shift of 63 won't fit in uimm5.
3866 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3867 isPowerOf2_64(std::abs(StepNumerator))) {
3868 Negate = StepNumerator < 0;
3869 StepOpcode = ISD::SHL;
3870 SplatStepVal = Log2_64(std::abs(StepNumerator));
3871 }
3872
3873 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3874 // threshold since it's the immediate value many RVV instructions accept.
3875 // There is no vmul.vi instruction so ensure multiply constant can fit in
3876 // a single addi instruction.
3877 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3878 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3879 isPowerOf2_32(StepDenominator) &&
3880 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3881 MVT VIDVT =
3883 MVT VIDContainerVT =
3884 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3885 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3886 // Convert right out of the scalable type so we can use standard ISD
3887 // nodes for the rest of the computation. If we used scalable types with
3888 // these, we'd lose the fixed-length vector info and generate worse
3889 // vsetvli code.
3890 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3891 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3892 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3893 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3894 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3895 }
3896 if (StepDenominator != 1) {
3897 SDValue SplatStep =
3898 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3899 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3900 }
3901 if (Addend != 0 || Negate) {
3902 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3903 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3904 VID);
3905 }
3906 if (VT.isFloatingPoint()) {
3907 // TODO: Use vfwcvt to reduce register pressure.
3908 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3909 }
3910 return VID;
3911 }
3912 }
3913
3914 // For very small build_vectors, use a single scalar insert of a constant.
3915 // TODO: Base this on constant rematerialization cost, not size.
3916 const unsigned EltBitSize = VT.getScalarSizeInBits();
3917 if (VT.getSizeInBits() <= 32 &&
3919 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3920 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3921 "Unexpected sequence type");
3922 // If we can use the original VL with the modified element type, this
3923 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3924 // be moved into InsertVSETVLI?
3925 unsigned ViaVecLen =
3926 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3927 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3928
3929 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3930 uint64_t SplatValue = 0;
3931 // Construct the amalgamated value at this larger vector type.
3932 for (const auto &OpIdx : enumerate(Op->op_values())) {
3933 const auto &SeqV = OpIdx.value();
3934 if (!SeqV.isUndef())
3935 SplatValue |=
3936 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3937 }
3938
3939 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3940 // achieve better constant materializion.
3941 // On RV32, we need to sign-extend to use getSignedConstant.
3942 if (ViaIntVT == MVT::i32)
3943 SplatValue = SignExtend64<32>(SplatValue);
3944
3945 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3946 DAG.getUNDEF(ViaVecVT),
3947 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3948 DAG.getVectorIdxConstant(0, DL));
3949 if (ViaVecLen != 1)
3951 MVT::getVectorVT(ViaIntVT, 1), Vec,
3952 DAG.getConstant(0, DL, XLenVT));
3953 return DAG.getBitcast(VT, Vec);
3954 }
3955
3956
3957 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3958 // when re-interpreted as a vector with a larger element type. For example,
3959 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3960 // could be instead splat as
3961 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3962 // TODO: This optimization could also work on non-constant splats, but it
3963 // would require bit-manipulation instructions to construct the splat value.
3964 SmallVector<SDValue> Sequence;
3965 const auto *BV = cast<BuildVectorSDNode>(Op);
3966 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3968 BV->getRepeatedSequence(Sequence) &&
3969 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3970 unsigned SeqLen = Sequence.size();
3971 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3972 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3973 ViaIntVT == MVT::i64) &&
3974 "Unexpected sequence type");
3975
3976 // If we can use the original VL with the modified element type, this
3977 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3978 // be moved into InsertVSETVLI?
3979 const unsigned RequiredVL = NumElts / SeqLen;
3980 const unsigned ViaVecLen =
3981 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3982 NumElts : RequiredVL;
3983 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3984
3985 unsigned EltIdx = 0;
3986 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3987 uint64_t SplatValue = 0;
3988 // Construct the amalgamated value which can be splatted as this larger
3989 // vector type.
3990 for (const auto &SeqV : Sequence) {
3991 if (!SeqV.isUndef())
3992 SplatValue |=
3993 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3994 EltIdx++;
3995 }
3996
3997 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3998 // achieve better constant materializion.
3999 // On RV32, we need to sign-extend to use getSignedConstant.
4000 if (ViaIntVT == MVT::i32)
4001 SplatValue = SignExtend64<32>(SplatValue);
4002
4003 // Since we can't introduce illegal i64 types at this stage, we can only
4004 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4005 // way we can use RVV instructions to splat.
4006 assert((ViaIntVT.bitsLE(XLenVT) ||
4007 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4008 "Unexpected bitcast sequence");
4009 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4010 SDValue ViaVL =
4011 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4012 MVT ViaContainerVT =
4013 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4014 SDValue Splat =
4015 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4016 DAG.getUNDEF(ViaContainerVT),
4017 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4018 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4019 if (ViaVecLen != RequiredVL)
4021 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
4022 DAG.getConstant(0, DL, XLenVT));
4023 return DAG.getBitcast(VT, Splat);
4024 }
4025 }
4026
4027 // If the number of signbits allows, see if we can lower as a <N x i8>.
4028 // Our main goal here is to reduce LMUL (and thus work) required to
4029 // build the constant, but we will also narrow if the resulting
4030 // narrow vector is known to materialize cheaply.
4031 // TODO: We really should be costing the smaller vector. There are
4032 // profitable cases this misses.
4033 if (EltBitSize > 8 && VT.isInteger() &&
4034 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4035 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4036 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4037 DL, Op->ops());
4038 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4039 Source, DAG, Subtarget);
4040 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4041 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4042 }
4043
4044 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4045 return Res;
4046
4047 // For constant vectors, use generic constant pool lowering. Otherwise,
4048 // we'd have to materialize constants in GPRs just to move them into the
4049 // vector.
4050 return SDValue();
4051}
4052
4053static unsigned getPACKOpcode(unsigned DestBW,
4054 const RISCVSubtarget &Subtarget) {
4055 switch (DestBW) {
4056 default:
4057 llvm_unreachable("Unsupported pack size");
4058 case 16:
4059 return RISCV::PACKH;
4060 case 32:
4061 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4062 case 64:
4063 assert(Subtarget.is64Bit());
4064 return RISCV::PACK;
4065 }
4066}
4067
4068/// Double the element size of the build vector to reduce the number
4069/// of vslide1down in the build vector chain. In the worst case, this
4070/// trades three scalar operations for 1 vector operation. Scalar
4071/// operations are generally lower latency, and for out-of-order cores
4072/// we also benefit from additional parallelism.
4074 const RISCVSubtarget &Subtarget) {
4075 SDLoc DL(Op);
4076 MVT VT = Op.getSimpleValueType();
4077 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4078 MVT ElemVT = VT.getVectorElementType();
4079 if (!ElemVT.isInteger())
4080 return SDValue();
4081
4082 // TODO: Relax these architectural restrictions, possibly with costing
4083 // of the actual instructions required.
4084 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4085 return SDValue();
4086
4087 unsigned NumElts = VT.getVectorNumElements();
4088 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4089 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4090 NumElts % 2 != 0)
4091 return SDValue();
4092
4093 // Produce [B,A] packed into a type twice as wide. Note that all
4094 // scalars are XLenVT, possibly masked (see below).
4095 MVT XLenVT = Subtarget.getXLenVT();
4096 SDValue Mask = DAG.getConstant(
4097 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4098 auto pack = [&](SDValue A, SDValue B) {
4099 // Bias the scheduling of the inserted operations to near the
4100 // definition of the element - this tends to reduce register
4101 // pressure overall.
4102 SDLoc ElemDL(B);
4103 if (Subtarget.hasStdExtZbkb())
4104 // Note that we're relying on the high bits of the result being
4105 // don't care. For PACKW, the result is *sign* extended.
4106 return SDValue(
4107 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4108 ElemDL, XLenVT, A, B),
4109 0);
4110
4111 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4112 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4113 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4114 SDNodeFlags Flags;
4115 Flags.setDisjoint(true);
4116 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4117 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt), Flags);
4118 };
4119
4120 SmallVector<SDValue> NewOperands;
4121 NewOperands.reserve(NumElts / 2);
4122 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4123 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4124 assert(NumElts == NewOperands.size() * 2);
4125 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4126 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4127 return DAG.getNode(ISD::BITCAST, DL, VT,
4128 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4129}
4130
4132 const RISCVSubtarget &Subtarget) {
4133 MVT VT = Op.getSimpleValueType();
4134 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4135
4136 MVT EltVT = VT.getVectorElementType();
4137 MVT XLenVT = Subtarget.getXLenVT();
4138
4139 SDLoc DL(Op);
4140
4141 // Proper support for f16 requires Zvfh. bf16 always requires special
4142 // handling. We need to cast the scalar to integer and create an integer
4143 // build_vector.
4144 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4145 MVT IVT = VT.changeVectorElementType(MVT::i16);
4147 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4148 SDValue Elem = Op.getOperand(I);
4149 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4150 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4151 // Called by LegalizeDAG, we need to use XLenVT operations since we
4152 // can't create illegal types.
4153 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4154 // Manually constant fold so the integer build_vector can be lowered
4155 // better. Waiting for DAGCombine will be too late.
4156 APInt V =
4157 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4158 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4159 } else {
4160 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4161 }
4162 } else {
4163 // Called by scalar type legalizer, we can use i16.
4164 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4165 }
4166 }
4167 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4168 return DAG.getBitcast(VT, Res);
4169 }
4170
4171 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4173 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4174
4175 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4176
4177 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4178
4179 if (VT.getVectorElementType() == MVT::i1) {
4180 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4181 // vector type, we have a legal equivalently-sized i8 type, so we can use
4182 // that.
4183 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4184 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4185
4186 SDValue WideVec;
4188 // For a splat, perform a scalar truncate before creating the wider
4189 // vector.
4190 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4191 DAG.getConstant(1, DL, Splat.getValueType()));
4192 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4193 } else {
4194 SmallVector<SDValue, 8> Ops(Op->op_values());
4195 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4196 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4197 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4198 }
4199
4200 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4201 }
4202
4204 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4205 return Gather;
4206 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4208 if (!VT.isFloatingPoint())
4209 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4210 Splat =
4211 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4212 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4213 }
4214
4215 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4216 return Res;
4217
4218 // If we're compiling for an exact VLEN value, we can split our work per
4219 // register in the register group.
4220 if (const auto VLen = Subtarget.getRealVLen();
4221 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4222 MVT ElemVT = VT.getVectorElementType();
4223 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4224 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4225 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4226 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4227 assert(M1VT == getLMUL1VT(M1VT));
4228
4229 // The following semantically builds up a fixed length concat_vector
4230 // of the component build_vectors. We eagerly lower to scalable and
4231 // insert_subvector here to avoid DAG combining it back to a large
4232 // build_vector.
4233 SmallVector<SDValue> BuildVectorOps(Op->ops());
4234 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4235 SDValue Vec = DAG.getUNDEF(ContainerVT);
4236 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4237 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4238 SDValue SubBV =
4239 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4240 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4241 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4242 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4243 DAG.getVectorIdxConstant(InsertIdx, DL));
4244 }
4245 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4246 }
4247
4248 // If we're about to resort to vslide1down (or stack usage), pack our
4249 // elements into the widest scalar type we can. This will force a VL/VTYPE
4250 // toggle, but reduces the critical path, the number of vslide1down ops
4251 // required, and possibly enables scalar folds of the values.
4252 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4253 return Res;
4254
4255 // For m1 vectors, if we have non-undef values in both halves of our vector,
4256 // split the vector into low and high halves, build them separately, then
4257 // use a vselect to combine them. For long vectors, this cuts the critical
4258 // path of the vslide1down sequence in half, and gives us an opportunity
4259 // to special case each half independently. Note that we don't change the
4260 // length of the sub-vectors here, so if both fallback to the generic
4261 // vslide1down path, we should be able to fold the vselect into the final
4262 // vslidedown (for the undef tail) for the first half w/ masking.
4263 unsigned NumElts = VT.getVectorNumElements();
4264 unsigned NumUndefElts =
4265 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4266 unsigned NumDefElts = NumElts - NumUndefElts;
4267 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4268 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4269 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4270 SmallVector<SDValue> MaskVals;
4271 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4272 SubVecAOps.reserve(NumElts);
4273 SubVecBOps.reserve(NumElts);
4274 for (unsigned i = 0; i < NumElts; i++) {
4275 SDValue Elem = Op->getOperand(i);
4276 if (i < NumElts / 2) {
4277 SubVecAOps.push_back(Elem);
4278 SubVecBOps.push_back(UndefElem);
4279 } else {
4280 SubVecAOps.push_back(UndefElem);
4281 SubVecBOps.push_back(Elem);
4282 }
4283 bool SelectMaskVal = (i < NumElts / 2);
4284 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4285 }
4286 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4287 MaskVals.size() == NumElts);
4288
4289 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4290 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4291 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4292 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4293 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4294 }
4295
4296 // Cap the cost at a value linear to the number of elements in the vector.
4297 // The default lowering is to use the stack. The vector store + scalar loads
4298 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4299 // being (at least) linear in LMUL. As a result, using the vslidedown
4300 // lowering for every element ends up being VL*LMUL..
4301 // TODO: Should we be directly costing the stack alternative? Doing so might
4302 // give us a more accurate upper bound.
4303 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4304
4305 // TODO: unify with TTI getSlideCost.
4306 InstructionCost PerSlideCost = 1;
4307 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4308 default: break;
4310 PerSlideCost = 2;
4311 break;
4313 PerSlideCost = 4;
4314 break;
4316 PerSlideCost = 8;
4317 break;
4318 }
4319
4320 // TODO: Should we be using the build instseq then cost + evaluate scheme
4321 // we use for integer constants here?
4322 unsigned UndefCount = 0;
4323 for (const SDValue &V : Op->ops()) {
4324 if (V.isUndef()) {
4325 UndefCount++;
4326 continue;
4327 }
4328 if (UndefCount) {
4329 LinearBudget -= PerSlideCost;
4330 UndefCount = 0;
4331 }
4332 LinearBudget -= PerSlideCost;
4333 }
4334 if (UndefCount) {
4335 LinearBudget -= PerSlideCost;
4336 }
4337
4338 if (LinearBudget < 0)
4339 return SDValue();
4340
4341 assert((!VT.isFloatingPoint() ||
4342 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4343 "Illegal type which will result in reserved encoding");
4344
4345 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4346
4347 SDValue Vec;
4348 UndefCount = 0;
4349 for (SDValue V : Op->ops()) {
4350 if (V.isUndef()) {
4351 UndefCount++;
4352 continue;
4353 }
4354
4355 // Start our sequence with a TA splat in the hopes that hardware is able to
4356 // recognize there's no dependency on the prior value of our temporary
4357 // register.
4358 if (!Vec) {
4359 Vec = DAG.getSplatVector(VT, DL, V);
4360 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4361 UndefCount = 0;
4362 continue;
4363 }
4364
4365 if (UndefCount) {
4366 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4367 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4368 Vec, Offset, Mask, VL, Policy);
4369 UndefCount = 0;
4370 }
4371 auto OpCode =
4373 if (!VT.isFloatingPoint())
4374 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4375 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4376 V, Mask, VL);
4377 }
4378 if (UndefCount) {
4379 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4380 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4381 Vec, Offset, Mask, VL, Policy);
4382 }
4383 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4384}
4385
4386static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4388 SelectionDAG &DAG) {
4389 if (!Passthru)
4390 Passthru = DAG.getUNDEF(VT);
4392 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4393 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4394 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4395 // node in order to try and match RVV vector/scalar instructions.
4396 if ((LoC >> 31) == HiC)
4397 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4398
4399 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4400 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4401 // vlmax vsetvli or vsetivli to change the VL.
4402 // FIXME: Support larger constants?
4403 // FIXME: Support non-constant VLs by saturating?
4404 if (LoC == HiC) {
4405 SDValue NewVL;
4406 if (isAllOnesConstant(VL) ||
4407 (isa<RegisterSDNode>(VL) &&
4408 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4409 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4410 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4411 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4412
4413 if (NewVL) {
4414 MVT InterVT =
4415 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4416 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4417 DAG.getUNDEF(InterVT), Lo, NewVL);
4418 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4419 }
4420 }
4421 }
4422
4423 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4424 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4425 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4426 Hi.getConstantOperandVal(1) == 31)
4427 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4428
4429 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4430 // even if it might be sign extended.
4431 if (Hi.isUndef())
4432 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4433
4434 // Fall back to a stack store and stride x0 vector load.
4435 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4436 Hi, VL);
4437}
4438
4439// Called by type legalization to handle splat of i64 on RV32.
4440// FIXME: We can optimize this when the type has sign or zero bits in one
4441// of the halves.
4442static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4443 SDValue Scalar, SDValue VL,
4444 SelectionDAG &DAG) {
4445 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4446 SDValue Lo, Hi;
4447 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4448 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4449}
4450
4451// This function lowers a splat of a scalar operand Splat with the vector
4452// length VL. It ensures the final sequence is type legal, which is useful when
4453// lowering a splat after type legalization.
4454static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4455 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4456 const RISCVSubtarget &Subtarget) {
4457 bool HasPassthru = Passthru && !Passthru.isUndef();
4458 if (!HasPassthru && !Passthru)
4459 Passthru = DAG.getUNDEF(VT);
4460
4461 MVT EltVT = VT.getVectorElementType();
4462 MVT XLenVT = Subtarget.getXLenVT();
4463
4464 if (VT.isFloatingPoint()) {
4465 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4466 EltVT == MVT::bf16) {
4467 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4468 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4469 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4470 else
4471 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4472 MVT IVT = VT.changeVectorElementType(MVT::i16);
4473 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4474 SDValue Splat =
4475 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4476 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4477 }
4478 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4479 }
4480
4481 // Simplest case is that the operand needs to be promoted to XLenVT.
4482 if (Scalar.getValueType().bitsLE(XLenVT)) {
4483 // If the operand is a constant, sign extend to increase our chances
4484 // of being able to use a .vi instruction. ANY_EXTEND would become a
4485 // a zero extend and the simm5 check in isel would fail.
4486 // FIXME: Should we ignore the upper bits in isel instead?
4487 unsigned ExtOpc =
4489 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4490 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4491 }
4492
4493 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4494 "Unexpected scalar for splat lowering!");
4495
4496 if (isOneConstant(VL) && isNullConstant(Scalar))
4497 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4498 DAG.getConstant(0, DL, XLenVT), VL);
4499
4500 // Otherwise use the more complicated splatting algorithm.
4501 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4502}
4503
4504// This function lowers an insert of a scalar operand Scalar into lane
4505// 0 of the vector regardless of the value of VL. The contents of the
4506// remaining lanes of the result vector are unspecified. VL is assumed
4507// to be non-zero.
4509 const SDLoc &DL, SelectionDAG &DAG,
4510 const RISCVSubtarget &Subtarget) {
4511 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4512
4513 const MVT XLenVT = Subtarget.getXLenVT();
4514 SDValue Passthru = DAG.getUNDEF(VT);
4515
4516 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4517 isNullConstant(Scalar.getOperand(1))) {
4518 SDValue ExtractedVal = Scalar.getOperand(0);
4519 // The element types must be the same.
4520 if (ExtractedVal.getValueType().getVectorElementType() ==
4521 VT.getVectorElementType()) {
4522 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4523 MVT ExtractedContainerVT = ExtractedVT;
4524 if (ExtractedContainerVT.isFixedLengthVector()) {
4525 ExtractedContainerVT = getContainerForFixedLengthVector(
4526 DAG, ExtractedContainerVT, Subtarget);
4527 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4528 ExtractedVal, DAG, Subtarget);
4529 }
4530 if (ExtractedContainerVT.bitsLE(VT))
4531 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4532 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4533 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4534 DAG.getVectorIdxConstant(0, DL));
4535 }
4536 }
4537
4538
4539 if (VT.isFloatingPoint())
4540 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4541 DAG.getUNDEF(VT), Scalar, VL);
4542
4543 // Avoid the tricky legalization cases by falling back to using the
4544 // splat code which already handles it gracefully.
4545 if (!Scalar.getValueType().bitsLE(XLenVT))
4546 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4547 DAG.getConstant(1, DL, XLenVT),
4548 VT, DL, DAG, Subtarget);
4549
4550 // If the operand is a constant, sign extend to increase our chances
4551 // of being able to use a .vi instruction. ANY_EXTEND would become a
4552 // a zero extend and the simm5 check in isel would fail.
4553 // FIXME: Should we ignore the upper bits in isel instead?
4554 unsigned ExtOpc =
4556 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4557 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4558 DAG.getUNDEF(VT), Scalar, VL);
4559}
4560
4561// Is this a shuffle extracts either the even or odd elements of a vector?
4562// That is, specifically, either (a) or (b) below.
4563// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4564// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4565// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4566// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4567// Returns {Src Vector, Even Elements} on success
4568static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4569 SDValue V2, ArrayRef<int> Mask,
4570 const RISCVSubtarget &Subtarget) {
4571 // Need to be able to widen the vector.
4572 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4573 return false;
4574
4575 // Both input must be extracts.
4576 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4577 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4578 return false;
4579
4580 // Extracting from the same source.
4581 SDValue Src = V1.getOperand(0);
4582 if (Src != V2.getOperand(0))
4583 return false;
4584
4585 // Src needs to have twice the number of elements.
4586 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4587 return false;
4588
4589 // The extracts must extract the two halves of the source.
4590 if (V1.getConstantOperandVal(1) != 0 ||
4591 V2.getConstantOperandVal(1) != Mask.size())
4592 return false;
4593
4594 // First index must be the first even or odd element from V1.
4595 if (Mask[0] != 0 && Mask[0] != 1)
4596 return false;
4597
4598 // The others must increase by 2 each time.
4599 // TODO: Support undef elements?
4600 for (unsigned i = 1; i != Mask.size(); ++i)
4601 if (Mask[i] != Mask[i - 1] + 2)
4602 return false;
4603
4604 return true;
4605}
4606
4607/// Is this shuffle interleaving contiguous elements from one vector into the
4608/// even elements and contiguous elements from another vector into the odd
4609/// elements. \p EvenSrc will contain the element that should be in the first
4610/// even element. \p OddSrc will contain the element that should be in the first
4611/// odd element. These can be the first element in a source or the element half
4612/// way through the source.
4613static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4614 int &OddSrc, const RISCVSubtarget &Subtarget) {
4615 // We need to be able to widen elements to the next larger integer type.
4616 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4617 return false;
4618
4619 int Size = Mask.size();
4620 int NumElts = VT.getVectorNumElements();
4621 assert(Size == (int)NumElts && "Unexpected mask size");
4622
4623 SmallVector<unsigned, 2> StartIndexes;
4624 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4625 return false;
4626
4627 EvenSrc = StartIndexes[0];
4628 OddSrc = StartIndexes[1];
4629
4630 // One source should be low half of first vector.
4631 if (EvenSrc != 0 && OddSrc != 0)
4632 return false;
4633
4634 // Subvectors will be subtracted from either at the start of the two input
4635 // vectors, or at the start and middle of the first vector if it's an unary
4636 // interleave.
4637 // In both cases, HalfNumElts will be extracted.
4638 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4639 // we'll create an illegal extract_subvector.
4640 // FIXME: We could support other values using a slidedown first.
4641 int HalfNumElts = NumElts / 2;
4642 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4643}
4644
4645/// Match shuffles that concatenate two vectors, rotate the concatenation,
4646/// and then extract the original number of elements from the rotated result.
4647/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4648/// returned rotation amount is for a rotate right, where elements move from
4649/// higher elements to lower elements. \p LoSrc indicates the first source
4650/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4651/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4652/// 0 or 1 if a rotation is found.
4653///
4654/// NOTE: We talk about rotate to the right which matches how bit shift and
4655/// rotate instructions are described where LSBs are on the right, but LLVM IR
4656/// and the table below write vectors with the lowest elements on the left.
4657static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4658 int Size = Mask.size();
4659
4660 // We need to detect various ways of spelling a rotation:
4661 // [11, 12, 13, 14, 15, 0, 1, 2]
4662 // [-1, 12, 13, 14, -1, -1, 1, -1]
4663 // [-1, -1, -1, -1, -1, -1, 1, 2]
4664 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4665 // [-1, 4, 5, 6, -1, -1, 9, -1]
4666 // [-1, 4, 5, 6, -1, -1, -1, -1]
4667 int Rotation = 0;
4668 LoSrc = -1;
4669 HiSrc = -1;
4670 for (int i = 0; i != Size; ++i) {
4671 int M = Mask[i];
4672 if (M < 0)
4673 continue;
4674
4675 // Determine where a rotate vector would have started.
4676 int StartIdx = i - (M % Size);
4677 // The identity rotation isn't interesting, stop.
4678 if (StartIdx == 0)
4679 return -1;
4680
4681 // If we found the tail of a vector the rotation must be the missing
4682 // front. If we found the head of a vector, it must be how much of the
4683 // head.
4684 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4685
4686 if (Rotation == 0)
4687 Rotation = CandidateRotation;
4688 else if (Rotation != CandidateRotation)
4689 // The rotations don't match, so we can't match this mask.
4690 return -1;
4691
4692 // Compute which value this mask is pointing at.
4693 int MaskSrc = M < Size ? 0 : 1;
4694
4695 // Compute which of the two target values this index should be assigned to.
4696 // This reflects whether the high elements are remaining or the low elemnts
4697 // are remaining.
4698 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4699
4700 // Either set up this value if we've not encountered it before, or check
4701 // that it remains consistent.
4702 if (TargetSrc < 0)
4703 TargetSrc = MaskSrc;
4704 else if (TargetSrc != MaskSrc)
4705 // This may be a rotation, but it pulls from the inputs in some
4706 // unsupported interleaving.
4707 return -1;
4708 }
4709
4710 // Check that we successfully analyzed the mask, and normalize the results.
4711 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4712 assert((LoSrc >= 0 || HiSrc >= 0) &&
4713 "Failed to find a rotated input vector!");
4714
4715 return Rotation;
4716}
4717
4718// Lower a deinterleave shuffle to vnsrl.
4719// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4720// -> [p, q, r, s] (EvenElts == false)
4721// VT is the type of the vector to return, <[vscale x ]n x ty>
4722// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4724 bool EvenElts,
4725 const RISCVSubtarget &Subtarget,
4726 SelectionDAG &DAG) {
4727 // The result is a vector of type <m x n x ty>
4728 MVT ContainerVT = VT;
4729 // Convert fixed vectors to scalable if needed
4730 if (ContainerVT.isFixedLengthVector()) {
4731 assert(Src.getSimpleValueType().isFixedLengthVector());
4732 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4733
4734 // The source is a vector of type <m x n*2 x ty>
4735 MVT SrcContainerVT =
4737 ContainerVT.getVectorElementCount() * 2);
4738 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4739 }
4740
4741 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4742
4743 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4744 // This also converts FP to int.
4745 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4746 MVT WideSrcContainerVT = MVT::getVectorVT(
4747 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4748 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4749
4750 // The integer version of the container type.
4751 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4752
4753 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4754 // the original element size.
4755 unsigned Shift = EvenElts ? 0 : EltBits;
4756 SDValue SplatShift = DAG.getNode(
4757 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4758 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4759 SDValue Res =
4760 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4761 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4762 // Cast back to FP if needed.
4763 Res = DAG.getBitcast(ContainerVT, Res);
4764
4765 if (VT.isFixedLengthVector())
4766 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4767 return Res;
4768}
4769
4770// Lower the following shuffle to vslidedown.
4771// a)
4772// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4773// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4774// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4775// b)
4776// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4777// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4778// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4779// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4780// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4781// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4783 SDValue V1, SDValue V2,
4784 ArrayRef<int> Mask,
4785 const RISCVSubtarget &Subtarget,
4786 SelectionDAG &DAG) {
4787 auto findNonEXTRACT_SUBVECTORParent =
4788 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4789 uint64_t Offset = 0;
4790 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4791 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4792 // a scalable vector. But we don't want to match the case.
4793 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4794 Offset += Parent.getConstantOperandVal(1);
4795 Parent = Parent.getOperand(0);
4796 }
4797 return std::make_pair(Parent, Offset);
4798 };
4799
4800 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4801 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4802
4803 // Extracting from the same source.
4804 SDValue Src = V1Src;
4805 if (Src != V2Src)
4806 return SDValue();
4807
4808 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4809 SmallVector<int, 16> NewMask(Mask);
4810 for (size_t i = 0; i != NewMask.size(); ++i) {
4811 if (NewMask[i] == -1)
4812 continue;
4813
4814 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4815 NewMask[i] = NewMask[i] + V1IndexOffset;
4816 } else {
4817 // Minus NewMask.size() is needed. Otherwise, the b case would be
4818 // <5,6,7,12> instead of <5,6,7,8>.
4819 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4820 }
4821 }
4822
4823 // First index must be known and non-zero. It will be used as the slidedown
4824 // amount.
4825 if (NewMask[0] <= 0)
4826 return SDValue();
4827
4828 // NewMask is also continuous.
4829 for (unsigned i = 1; i != NewMask.size(); ++i)
4830 if (NewMask[i - 1] + 1 != NewMask[i])
4831 return SDValue();
4832
4833 MVT XLenVT = Subtarget.getXLenVT();
4834 MVT SrcVT = Src.getSimpleValueType();
4835 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4836 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4837 SDValue Slidedown =
4838 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4839 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4840 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4841 return DAG.getNode(
4843 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4844 DAG.getConstant(0, DL, XLenVT));
4845}
4846
4847// Because vslideup leaves the destination elements at the start intact, we can
4848// use it to perform shuffles that insert subvectors:
4849//
4850// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4851// ->
4852// vsetvli zero, 8, e8, mf2, ta, ma
4853// vslideup.vi v8, v9, 4
4854//
4855// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4856// ->
4857// vsetvli zero, 5, e8, mf2, tu, ma
4858// vslideup.v1 v8, v9, 2
4860 SDValue V1, SDValue V2,
4861 ArrayRef<int> Mask,
4862 const RISCVSubtarget &Subtarget,
4863 SelectionDAG &DAG) {
4864 unsigned NumElts = VT.getVectorNumElements();
4865 int NumSubElts, Index;
4866 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4867 Index))
4868 return SDValue();
4869
4870 bool OpsSwapped = Mask[Index] < (int)NumElts;
4871 SDValue InPlace = OpsSwapped ? V2 : V1;
4872 SDValue ToInsert = OpsSwapped ? V1 : V2;
4873
4874 MVT XLenVT = Subtarget.getXLenVT();
4875 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4876 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4877 // We slide up by the index that the subvector is being inserted at, and set
4878 // VL to the index + the number of elements being inserted.
4880 // If the we're adding a suffix to the in place vector, i.e. inserting right
4881 // up to the very end of it, then we don't actually care about the tail.
4882 if (NumSubElts + Index >= (int)NumElts)
4883 Policy |= RISCVII::TAIL_AGNOSTIC;
4884
4885 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4886 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4887 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4888
4889 SDValue Res;
4890 // If we're inserting into the lowest elements, use a tail undisturbed
4891 // vmv.v.v.
4892 if (Index == 0)
4893 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4894 VL);
4895 else
4896 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4897 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4898 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4899}
4900
4901/// Match v(f)slide1up/down idioms. These operations involve sliding
4902/// N-1 elements to make room for an inserted scalar at one end.
4904 SDValue V1, SDValue V2,
4905 ArrayRef<int> Mask,
4906 const RISCVSubtarget &Subtarget,
4907 SelectionDAG &DAG) {
4908 bool OpsSwapped = false;
4909 if (!isa<BuildVectorSDNode>(V1)) {
4910 if (!isa<BuildVectorSDNode>(V2))
4911 return SDValue();
4912 std::swap(V1, V2);
4913 OpsSwapped = true;
4914 }
4915 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4916 if (!Splat)
4917 return SDValue();
4918
4919 // Return true if the mask could describe a slide of Mask.size() - 1
4920 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4921 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4922 const unsigned S = (Offset > 0) ? 0 : -Offset;
4923 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4924 for (unsigned i = S; i != E; ++i)
4925 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4926 return false;
4927 return true;
4928 };
4929
4930 const unsigned NumElts = VT.getVectorNumElements();
4931 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4932 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4933 return SDValue();
4934
4935 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4936 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4937 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4938 return SDValue();
4939
4940 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4941 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4942 auto OpCode = IsVSlidedown ?
4945 if (!VT.isFloatingPoint())
4946 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4947 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4948 DAG.getUNDEF(ContainerVT),
4949 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4950 Splat, TrueMask, VL);
4951 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4952}
4953
4954// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4955// to create an interleaved vector of <[vscale x] n*2 x ty>.
4956// This requires that the size of ty is less than the subtarget's maximum ELEN.
4958 const SDLoc &DL, SelectionDAG &DAG,
4959 const RISCVSubtarget &Subtarget) {
4960 MVT VecVT = EvenV.getSimpleValueType();
4961 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4962 // Convert fixed vectors to scalable if needed
4963 if (VecContainerVT.isFixedLengthVector()) {
4964 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4965 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4966 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4967 }
4968
4969 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4970
4971 // We're working with a vector of the same size as the resulting
4972 // interleaved vector, but with half the number of elements and
4973 // twice the SEW (Hence the restriction on not using the maximum
4974 // ELEN)
4975 MVT WideVT =
4977 VecVT.getVectorElementCount());
4978 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4979 if (WideContainerVT.isFixedLengthVector())
4980 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4981
4982 // Bitcast the input vectors to integers in case they are FP
4983 VecContainerVT = VecContainerVT.changeTypeToInteger();
4984 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4985 OddV = DAG.getBitcast(VecContainerVT, OddV);
4986
4987 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4988 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4989
4990 SDValue Interleaved;
4991 if (OddV.isUndef()) {
4992 // If OddV is undef, this is a zero extend.
4993 // FIXME: Not only does this optimize the code, it fixes some correctness
4994 // issues because MIR does not have freeze.
4995 Interleaved =
4996 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4997 } else if (Subtarget.hasStdExtZvbb()) {
4998 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4999 SDValue OffsetVec =
5000 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5001 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5002 OffsetVec, Passthru, Mask, VL);
5003 if (!EvenV.isUndef())
5004 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5005 Interleaved, EvenV, Passthru, Mask, VL);
5006 } else if (EvenV.isUndef()) {
5007 Interleaved =
5008 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
5009
5010 SDValue OffsetVec =
5011 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
5012 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
5013 Interleaved, OffsetVec, Passthru, Mask, VL);
5014 } else {
5015 // FIXME: We should freeze the odd vector here. We already handled the case
5016 // of provably undef/poison above.
5017
5018 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5019 // vwaddu.vv
5020 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5021 OddV, Passthru, Mask, VL);
5022
5023 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5024 SDValue AllOnesVec = DAG.getSplatVector(
5025 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5026 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5027 OddV, AllOnesVec, Passthru, Mask, VL);
5028
5029 // Add the two together so we get
5030 // (OddV * 0xff...ff) + (OddV + EvenV)
5031 // = (OddV * 0x100...00) + EvenV
5032 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5033 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5034 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5035 Interleaved, OddsMul, Passthru, Mask, VL);
5036 }
5037
5038 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5039 MVT ResultContainerVT = MVT::getVectorVT(
5040 VecVT.getVectorElementType(), // Make sure to use original type
5041 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5042 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5043
5044 // Convert back to a fixed vector if needed
5045 MVT ResultVT =
5048 if (ResultVT.isFixedLengthVector())
5049 Interleaved =
5050 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5051
5052 return Interleaved;
5053}
5054
5055// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5056// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5058 SelectionDAG &DAG,
5059 const RISCVSubtarget &Subtarget) {
5060 SDLoc DL(SVN);
5061 MVT VT = SVN->getSimpleValueType(0);
5062 SDValue V = SVN->getOperand(0);
5063 unsigned NumElts = VT.getVectorNumElements();
5064
5065 assert(VT.getVectorElementType() == MVT::i1);
5066
5068 SVN->getMask().size()) ||
5069 !SVN->getOperand(1).isUndef())
5070 return SDValue();
5071
5072 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5073 EVT ViaVT = EVT::getVectorVT(
5074 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5075 EVT ViaBitVT =
5076 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5077
5078 // If we don't have zvbb or the larger element type > ELEN, the operation will
5079 // be illegal.
5081 ViaVT) ||
5082 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5083 return SDValue();
5084
5085 // If the bit vector doesn't fit exactly into the larger element type, we need
5086 // to insert it into the larger vector and then shift up the reversed bits
5087 // afterwards to get rid of the gap introduced.
5088 if (ViaEltSize > NumElts)
5089 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5090 V, DAG.getVectorIdxConstant(0, DL));
5091
5092 SDValue Res =
5093 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5094
5095 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5096 // element type.
5097 if (ViaEltSize > NumElts)
5098 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5099 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5100
5101 Res = DAG.getBitcast(ViaBitVT, Res);
5102
5103 if (ViaEltSize > NumElts)
5104 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5105 DAG.getVectorIdxConstant(0, DL));
5106 return Res;
5107}
5108
5110 SelectionDAG &DAG,
5111 const RISCVSubtarget &Subtarget,
5112 MVT &RotateVT, unsigned &RotateAmt) {
5113 SDLoc DL(SVN);
5114
5115 EVT VT = SVN->getValueType(0);
5116 unsigned NumElts = VT.getVectorNumElements();
5117 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5118 unsigned NumSubElts;
5119 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5120 NumElts, NumSubElts, RotateAmt))
5121 return false;
5122 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5123 NumElts / NumSubElts);
5124
5125 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5126 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5127}
5128
5129// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5130// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5131// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5133 SelectionDAG &DAG,
5134 const RISCVSubtarget &Subtarget) {
5135 SDLoc DL(SVN);
5136
5137 EVT VT = SVN->getValueType(0);
5138 unsigned RotateAmt;
5139 MVT RotateVT;
5140 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5141 return SDValue();
5142
5143 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5144
5145 SDValue Rotate;
5146 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5147 // so canonicalize to vrev8.
5148 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5149 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5150 else
5151 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5152 DAG.getConstant(RotateAmt, DL, RotateVT));
5153
5154 return DAG.getBitcast(VT, Rotate);
5155}
5156
5157// If compiling with an exactly known VLEN, see if we can split a
5158// shuffle on m2 or larger into a small number of m1 sized shuffles
5159// which write each destination registers exactly once.
5161 SelectionDAG &DAG,
5162 const RISCVSubtarget &Subtarget) {
5163 SDLoc DL(SVN);
5164 MVT VT = SVN->getSimpleValueType(0);
5165 SDValue V1 = SVN->getOperand(0);
5166 SDValue V2 = SVN->getOperand(1);
5167 ArrayRef<int> Mask = SVN->getMask();
5168 unsigned NumElts = VT.getVectorNumElements();
5169
5170 // If we don't know exact data layout, not much we can do. If this
5171 // is already m1 or smaller, no point in splitting further.
5172 const auto VLen = Subtarget.getRealVLen();
5173 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5174 return SDValue();
5175
5176 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5177 // expansion for.
5178 unsigned RotateAmt;
5179 MVT RotateVT;
5180 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5181 return SDValue();
5182
5183 MVT ElemVT = VT.getVectorElementType();
5184 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5185 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
5186
5188 OutMasks(VRegsPerSrc, {-1, {}});
5189
5190 // Check if our mask can be done as a 1-to-1 mapping from source
5191 // to destination registers in the group without needing to
5192 // write each destination more than once.
5193 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
5194 int DstVecIdx = DstIdx / ElemsPerVReg;
5195 int DstSubIdx = DstIdx % ElemsPerVReg;
5196 int SrcIdx = Mask[DstIdx];
5197 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
5198 continue;
5199 int SrcVecIdx = SrcIdx / ElemsPerVReg;
5200 int SrcSubIdx = SrcIdx % ElemsPerVReg;
5201 if (OutMasks[DstVecIdx].first == -1)
5202 OutMasks[DstVecIdx].first = SrcVecIdx;
5203 if (OutMasks[DstVecIdx].first != SrcVecIdx)
5204 // Note: This case could easily be handled by keeping track of a chain
5205 // of source values and generating two element shuffles below. This is
5206 // less an implementation question, and more a profitability one.
5207 return SDValue();
5208
5209 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
5210 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
5211 }
5212
5213 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5214 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5215 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5216 assert(M1VT == getLMUL1VT(M1VT));
5217 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5218 SDValue Vec = DAG.getUNDEF(ContainerVT);
5219 // The following semantically builds up a fixed length concat_vector
5220 // of the component shuffle_vectors. We eagerly lower to scalable here
5221 // to avoid DAG combining it back to a large shuffle_vector again.
5222 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5223 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5224 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
5225 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
5226 if (SrcVecIdx == -1)
5227 continue;
5228 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
5229 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
5230 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5231 DAG.getVectorIdxConstant(ExtractIdx, DL));
5232 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5233 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
5234 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
5235 unsigned InsertIdx = DstVecIdx * NumOpElts;
5236 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
5237 DAG.getVectorIdxConstant(InsertIdx, DL));
5238 }
5239 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5240}
5241
5243 const RISCVSubtarget &Subtarget) {
5244 SDValue V1 = Op.getOperand(0);
5245 SDValue V2 = Op.getOperand(1);
5246 SDLoc DL(Op);
5247 MVT XLenVT = Subtarget.getXLenVT();
5248 MVT VT = Op.getSimpleValueType();
5249 unsigned NumElts = VT.getVectorNumElements();
5251
5252 if (VT.getVectorElementType() == MVT::i1) {
5253 // Lower to a vror.vi of a larger element type if possible before we promote
5254 // i1s to i8s.
5255 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5256 return V;
5257 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5258 return V;
5259
5260 // Promote i1 shuffle to i8 shuffle.
5261 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5262 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5263 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5264 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5265 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5266 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5267 ISD::SETNE);
5268 }
5269
5270 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5271
5272 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5273
5274 if (SVN->isSplat()) {
5275 const int Lane = SVN->getSplatIndex();
5276 if (Lane >= 0) {
5277 MVT SVT = VT.getVectorElementType();
5278
5279 // Turn splatted vector load into a strided load with an X0 stride.
5280 SDValue V = V1;
5281 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5282 // with undef.
5283 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5284 int Offset = Lane;
5285 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5286 int OpElements =
5287 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5288 V = V.getOperand(Offset / OpElements);
5289 Offset %= OpElements;
5290 }
5291
5292 // We need to ensure the load isn't atomic or volatile.
5293 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5294 auto *Ld = cast<LoadSDNode>(V);
5295 Offset *= SVT.getStoreSize();
5296 SDValue NewAddr = DAG.getMemBasePlusOffset(
5297 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5298
5299 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5300 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5301 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5302 SDValue IntID =
5303 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5304 SDValue Ops[] = {Ld->getChain(),
5305 IntID,
5306 DAG.getUNDEF(ContainerVT),
5307 NewAddr,
5308 DAG.getRegister(RISCV::X0, XLenVT),
5309 VL};
5310 SDValue NewLoad = DAG.getMemIntrinsicNode(
5311 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5313 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5314 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5315 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5316 }
5317
5318 MVT SplatVT = ContainerVT;
5319
5320 // If we don't have Zfh, we need to use an integer scalar load.
5321 if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) {
5322 SVT = MVT::i16;
5323 SplatVT = ContainerVT.changeVectorElementType(SVT);
5324 }
5325
5326 // Otherwise use a scalar load and splat. This will give the best
5327 // opportunity to fold a splat into the operation. ISel can turn it into
5328 // the x0 strided load if we aren't able to fold away the select.
5329 if (SVT.isFloatingPoint())
5330 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5331 Ld->getPointerInfo().getWithOffset(Offset),
5332 Ld->getOriginalAlign(),
5333 Ld->getMemOperand()->getFlags());
5334 else
5335 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5336 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5337 Ld->getOriginalAlign(),
5338 Ld->getMemOperand()->getFlags());
5340
5341 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5343 SDValue Splat =
5344 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5345 Splat = DAG.getBitcast(ContainerVT, Splat);
5346 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5347 }
5348
5349 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5350 assert(Lane < (int)NumElts && "Unexpected lane!");
5351 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5352 V1, DAG.getConstant(Lane, DL, XLenVT),
5353 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5354 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5355 }
5356 }
5357
5358 // For exact VLEN m2 or greater, try to split to m1 operations if we
5359 // can split cleanly.
5360 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5361 return V;
5362
5363 ArrayRef<int> Mask = SVN->getMask();
5364
5365 if (SDValue V =
5366 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5367 return V;
5368
5369 if (SDValue V =
5370 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5371 return V;
5372
5373 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5374 // available.
5375 if (Subtarget.hasStdExtZvkb())
5376 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5377 return V;
5378
5379 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5380 // be undef which can be handled with a single SLIDEDOWN/UP.
5381 int LoSrc, HiSrc;
5382 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5383 if (Rotation > 0) {
5384 SDValue LoV, HiV;
5385 if (LoSrc >= 0) {
5386 LoV = LoSrc == 0 ? V1 : V2;
5387 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5388 }
5389 if (HiSrc >= 0) {
5390 HiV = HiSrc == 0 ? V1 : V2;
5391 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5392 }
5393
5394 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5395 // to slide LoV up by (NumElts - Rotation).
5396 unsigned InvRotate = NumElts - Rotation;
5397
5398 SDValue Res = DAG.getUNDEF(ContainerVT);
5399 if (HiV) {
5400 // Even though we could use a smaller VL, don't to avoid a vsetivli
5401 // toggle.
5402 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5403 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5404 }
5405 if (LoV)
5406 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5407 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5409
5410 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5411 }
5412
5413 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5414 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5415
5416 // If this is a deinterleave and we can widen the vector, then we can use
5417 // vnsrl to deinterleave.
5418 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5419 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5420 Subtarget, DAG);
5421 }
5422
5423 if (SDValue V =
5424 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5425 return V;
5426
5427 // Detect an interleave shuffle and lower to
5428 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5429 int EvenSrc, OddSrc;
5430 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5431 // Extract the halves of the vectors.
5432 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5433
5434 int Size = Mask.size();
5435 SDValue EvenV, OddV;
5436 assert(EvenSrc >= 0 && "Undef source?");
5437 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5438 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5439 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5440
5441 assert(OddSrc >= 0 && "Undef source?");
5442 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5443 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5444 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5445
5446 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5447 }
5448
5449
5450 // Handle any remaining single source shuffles
5451 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5452 if (V2.isUndef()) {
5453 // We might be able to express the shuffle as a bitrotate. But even if we
5454 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5455 // shifts and a vor will have a higher throughput than a vrgather.
5456 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5457 return V;
5458
5459 if (VT.getScalarSizeInBits() == 8 &&
5460 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5461 // On such a vector we're unable to use i8 as the index type.
5462 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5463 // may involve vector splitting if we're already at LMUL=8, or our
5464 // user-supplied maximum fixed-length LMUL.
5465 return SDValue();
5466 }
5467
5468 // Base case for the two operand recursion below - handle the worst case
5469 // single source shuffle.
5470 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5471 MVT IndexVT = VT.changeTypeToInteger();
5472 // Since we can't introduce illegal index types at this stage, use i16 and
5473 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5474 // than XLenVT.
5475 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5476 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5477 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5478 }
5479
5480 // If the mask allows, we can do all the index computation in 16 bits. This
5481 // requires less work and less register pressure at high LMUL, and creates
5482 // smaller constants which may be cheaper to materialize.
5483 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5484 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5485 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5486 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5487 }
5488
5489 MVT IndexContainerVT =
5490 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5491
5492 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5493 SmallVector<SDValue> GatherIndicesLHS;
5494 for (int MaskIndex : Mask) {
5495 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5496 GatherIndicesLHS.push_back(IsLHSIndex
5497 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5498 : DAG.getUNDEF(XLenVT));
5499 }
5500 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5501 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5502 Subtarget);
5503 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5504 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5505 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5506 }
5507
5508 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5509 // merged with a second vrgather.
5510 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5511
5512 // Now construct the mask that will be used by the blended vrgather operation.
5513 // Construct the appropriate indices into each vector.
5514 for (int MaskIndex : Mask) {
5515 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5516 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5517 ? MaskIndex : -1);
5518 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5519 }
5520
5521 // Try to pick a profitable operand order.
5522 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5523 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5524
5525 // Recursively invoke lowering for each operand if we had two
5526 // independent single source shuffles, and then combine the result via a
5527 // vselect. Note that the vselect will likely be folded back into the
5528 // second permute (vrgather, or other) by the post-isel combine.
5529 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5530 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5531
5532 SmallVector<SDValue> MaskVals;
5533 for (int MaskIndex : Mask) {
5534 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5535 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5536 }
5537
5538 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5539 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5540 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5541
5542 if (SwapOps)
5543 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5544 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5545}
5546
5548 // Support splats for any type. These should type legalize well.
5549 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5550 return true;
5551
5552 // Only support legal VTs for other shuffles for now.
5553 if (!isTypeLegal(VT))
5554 return false;
5555
5556 MVT SVT = VT.getSimpleVT();
5557
5558 // Not for i1 vectors.
5559 if (SVT.getScalarType() == MVT::i1)
5560 return false;
5561
5562 int Dummy1, Dummy2;
5563 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5564 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5565}
5566
5567// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5568// the exponent.
5569SDValue
5570RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5571 SelectionDAG &DAG) const {
5572 MVT VT = Op.getSimpleValueType();
5573 unsigned EltSize = VT.getScalarSizeInBits();
5574 SDValue Src = Op.getOperand(0);
5575 SDLoc DL(Op);
5576 MVT ContainerVT = VT;
5577
5578 SDValue Mask, VL;
5579 if (Op->isVPOpcode()) {
5580 Mask = Op.getOperand(1);
5581 if (VT.isFixedLengthVector())
5582 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5583 Subtarget);
5584 VL = Op.getOperand(2);
5585 }
5586
5587 // We choose FP type that can represent the value if possible. Otherwise, we
5588 // use rounding to zero conversion for correct exponent of the result.
5589 // TODO: Use f16 for i8 when possible?
5590 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5591 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5592 FloatEltVT = MVT::f32;
5593 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5594
5595 // Legal types should have been checked in the RISCVTargetLowering
5596 // constructor.
5597 // TODO: Splitting may make sense in some cases.
5598 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5599 "Expected legal float type!");
5600
5601 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5602 // The trailing zero count is equal to log2 of this single bit value.
5603 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5604 SDValue Neg = DAG.getNegative(Src, DL, VT);
5605 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5606 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5607 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5608 Src, Mask, VL);
5609 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5610 }
5611
5612 // We have a legal FP type, convert to it.
5613 SDValue FloatVal;
5614 if (FloatVT.bitsGT(VT)) {
5615 if (Op->isVPOpcode())
5616 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5617 else
5618 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5619 } else {
5620 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5621 if (VT.isFixedLengthVector()) {
5622 ContainerVT = getContainerForFixedLengthVector(VT);
5623 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5624 }
5625 if (!Op->isVPOpcode())
5626 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5627 SDValue RTZRM =
5629 MVT ContainerFloatVT =
5630 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5631 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5632 Src, Mask, RTZRM, VL);
5633 if (VT.isFixedLengthVector())
5634 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5635 }
5636 // Bitcast to integer and shift the exponent to the LSB.
5637 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5638 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5639 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5640
5641 SDValue Exp;
5642 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5643 if (Op->isVPOpcode()) {
5644 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5645 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5646 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5647 } else {
5648 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5649 DAG.getConstant(ShiftAmt, DL, IntVT));
5650 if (IntVT.bitsLT(VT))
5651 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5652 else if (IntVT.bitsGT(VT))
5653 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5654 }
5655
5656 // The exponent contains log2 of the value in biased form.
5657 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5658 // For trailing zeros, we just need to subtract the bias.
5659 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5660 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5661 DAG.getConstant(ExponentBias, DL, VT));
5662 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5663 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5664 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5665
5666 // For leading zeros, we need to remove the bias and convert from log2 to
5667 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5668 unsigned Adjust = ExponentBias + (EltSize - 1);
5669 SDValue Res;
5670 if (Op->isVPOpcode())
5671 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5672 Mask, VL);
5673 else
5674 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5675
5676 // The above result with zero input equals to Adjust which is greater than
5677 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5678 if (Op.getOpcode() == ISD::CTLZ)
5679 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5680 else if (Op.getOpcode() == ISD::VP_CTLZ)
5681 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5682 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5683 return Res;
5684}
5685
5686SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5687 SelectionDAG &DAG) const {
5688 SDLoc DL(Op);
5689 MVT XLenVT = Subtarget.getXLenVT();
5690 SDValue Source = Op->getOperand(0);
5691 MVT SrcVT = Source.getSimpleValueType();
5692 SDValue Mask = Op->getOperand(1);
5693 SDValue EVL = Op->getOperand(2);
5694
5695 if (SrcVT.isFixedLengthVector()) {
5696 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5697 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5698 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5699 Subtarget);
5700 SrcVT = ContainerVT;
5701 }
5702
5703 // Convert to boolean vector.
5704 if (SrcVT.getScalarType() != MVT::i1) {
5705 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5706 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5707 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5708 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5709 DAG.getUNDEF(SrcVT), Mask, EVL});
5710 }
5711
5712 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5713 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5714 // In this case, we can interpret poison as -1, so nothing to do further.
5715 return Res;
5716
5717 // Convert -1 to VL.
5718 SDValue SetCC =
5719 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5720 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5721 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5722}
5723
5724// While RVV has alignment restrictions, we should always be able to load as a
5725// legal equivalently-sized byte-typed vector instead. This method is
5726// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5727// the load is already correctly-aligned, it returns SDValue().
5728SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5729 SelectionDAG &DAG) const {
5730 auto *Load = cast<LoadSDNode>(Op);
5731 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5732
5734 Load->getMemoryVT(),
5735 *Load->getMemOperand()))
5736 return SDValue();
5737
5738 SDLoc DL(Op);
5739 MVT VT = Op.getSimpleValueType();
5740 unsigned EltSizeBits = VT.getScalarSizeInBits();
5741 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5742 "Unexpected unaligned RVV load type");
5743 MVT NewVT =
5744 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5745 assert(NewVT.isValid() &&
5746 "Expecting equally-sized RVV vector types to be legal");
5747 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5748 Load->getPointerInfo(), Load->getOriginalAlign(),
5749 Load->getMemOperand()->getFlags());
5750 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5751}
5752
5753// While RVV has alignment restrictions, we should always be able to store as a
5754// legal equivalently-sized byte-typed vector instead. This method is
5755// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5756// returns SDValue() if the store is already correctly aligned.
5757SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5758 SelectionDAG &DAG) const {
5759 auto *Store = cast<StoreSDNode>(Op);
5760 assert(Store && Store->getValue().getValueType().isVector() &&
5761 "Expected vector store");
5762
5764 Store->getMemoryVT(),
5765 *Store->getMemOperand()))
5766 return SDValue();
5767
5768 SDLoc DL(Op);
5769 SDValue StoredVal = Store->getValue();
5770 MVT VT = StoredVal.getSimpleValueType();
5771 unsigned EltSizeBits = VT.getScalarSizeInBits();
5772 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5773 "Unexpected unaligned RVV store type");
5774 MVT NewVT =
5775 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5776 assert(NewVT.isValid() &&
5777 "Expecting equally-sized RVV vector types to be legal");
5778 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5779 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5780 Store->getPointerInfo(), Store->getOriginalAlign(),
5781 Store->getMemOperand()->getFlags());
5782}
5783
5785 const RISCVSubtarget &Subtarget) {
5786 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5787
5788 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5789
5790 // All simm32 constants should be handled by isel.
5791 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5792 // this check redundant, but small immediates are common so this check
5793 // should have better compile time.
5794 if (isInt<32>(Imm))
5795 return Op;
5796
5797 // We only need to cost the immediate, if constant pool lowering is enabled.
5798 if (!Subtarget.useConstantPoolForLargeInts())
5799 return Op;
5800
5802 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5803 return Op;
5804
5805 // Optimizations below are disabled for opt size. If we're optimizing for
5806 // size, use a constant pool.
5807 if (DAG.shouldOptForSize())
5808 return SDValue();
5809
5810 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5811 // that if it will avoid a constant pool.
5812 // It will require an extra temporary register though.
5813 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5814 // low and high 32 bits are the same and bit 31 and 63 are set.
5815 unsigned ShiftAmt, AddOpc;
5816 RISCVMatInt::InstSeq SeqLo =
5817 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5818 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5819 return Op;
5820
5821 return SDValue();
5822}
5823
5824SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
5825 SelectionDAG &DAG) const {
5826 MVT VT = Op.getSimpleValueType();
5827 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
5828
5829 // Can this constant be selected by a Zfa FLI instruction?
5830 bool Negate = false;
5831 int Index = getLegalZfaFPImm(Imm, VT);
5832
5833 // If the constant is negative, try negating.
5834 if (Index < 0 && Imm.isNegative()) {
5835 Index = getLegalZfaFPImm(-Imm, VT);
5836 Negate = true;
5837 }
5838
5839 // If we couldn't find a FLI lowering, fall back to generic code.
5840 if (Index < 0)
5841 return SDValue();
5842
5843 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
5844 SDLoc DL(Op);
5845 SDValue Const =
5846 DAG.getNode(RISCVISD::FLI, DL, VT,
5847 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
5848 if (!Negate)
5849 return Const;
5850
5851 return DAG.getNode(ISD::FNEG, DL, VT, Const);
5852}
5853
5855 const RISCVSubtarget &Subtarget) {
5856 SDLoc dl(Op);
5857 AtomicOrdering FenceOrdering =
5858 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5859 SyncScope::ID FenceSSID =
5860 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5861
5862 if (Subtarget.hasStdExtZtso()) {
5863 // The only fence that needs an instruction is a sequentially-consistent
5864 // cross-thread fence.
5865 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5866 FenceSSID == SyncScope::System)
5867 return Op;
5868
5869 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5870 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5871 }
5872
5873 // singlethread fences only synchronize with signal handlers on the same
5874 // thread and thus only need to preserve instruction order, not actually
5875 // enforce memory ordering.
5876 if (FenceSSID == SyncScope::SingleThread)
5877 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5878 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5879
5880 return Op;
5881}
5882
5883SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5884 SelectionDAG &DAG) const {
5885 SDLoc DL(Op);
5886 MVT VT = Op.getSimpleValueType();
5887 MVT XLenVT = Subtarget.getXLenVT();
5888 unsigned Check = Op.getConstantOperandVal(1);
5889 unsigned TDCMask = 0;
5890 if (Check & fcSNan)
5891 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5892 if (Check & fcQNan)
5893 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5894 if (Check & fcPosInf)
5896 if (Check & fcNegInf)
5898 if (Check & fcPosNormal)
5900 if (Check & fcNegNormal)
5902 if (Check & fcPosSubnormal)
5904 if (Check & fcNegSubnormal)
5906 if (Check & fcPosZero)
5907 TDCMask |= RISCV::FPMASK_Positive_Zero;
5908 if (Check & fcNegZero)
5909 TDCMask |= RISCV::FPMASK_Negative_Zero;
5910
5911 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5912
5913 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5914
5915 if (VT.isVector()) {
5916 SDValue Op0 = Op.getOperand(0);
5917 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5918
5919 if (VT.isScalableVector()) {
5921 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5922 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5923 Mask = Op.getOperand(2);
5924 VL = Op.getOperand(3);
5925 }
5926 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5927 VL, Op->getFlags());
5928 if (IsOneBitMask)
5929 return DAG.getSetCC(DL, VT, FPCLASS,
5930 DAG.getConstant(TDCMask, DL, DstVT),
5932 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5933 DAG.getConstant(TDCMask, DL, DstVT));
5934 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5935 ISD::SETNE);
5936 }
5937
5938 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5939 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5940 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5941 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5942 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5943 Mask = Op.getOperand(2);
5944 MVT MaskContainerVT =
5945 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5946 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5947 VL = Op.getOperand(3);
5948 }
5949 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5950
5951 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5952 Mask, VL, Op->getFlags());
5953
5954 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5955 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5956 if (IsOneBitMask) {
5957 SDValue VMSEQ =
5958 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5959 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5960 DAG.getUNDEF(ContainerVT), Mask, VL});
5961 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5962 }
5963 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5964 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5965
5966 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5967 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5968 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5969
5970 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5971 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5972 DAG.getUNDEF(ContainerVT), Mask, VL});
5973 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5974 }
5975
5976 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5977 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5978 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5980 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5981}
5982
5983// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5984// operations propagate nans.
5986 const RISCVSubtarget &Subtarget) {
5987 SDLoc DL(Op);
5988 MVT VT = Op.getSimpleValueType();
5989
5990 SDValue X = Op.getOperand(0);
5991 SDValue Y = Op.getOperand(1);
5992
5993 if (!VT.isVector()) {
5994 MVT XLenVT = Subtarget.getXLenVT();
5995
5996 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5997 // ensures that when one input is a nan, the other will also be a nan
5998 // allowing the nan to propagate. If both inputs are nan, this will swap the
5999 // inputs which is harmless.
6000
6001 SDValue NewY = Y;
6002 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6003 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6004 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6005 }
6006
6007 SDValue NewX = X;
6008 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6009 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6010 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6011 }
6012
6013 unsigned Opc =
6014 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6015 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6016 }
6017
6018 // Check no NaNs before converting to fixed vector scalable.
6019 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6020 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6021
6022 MVT ContainerVT = VT;
6023 if (VT.isFixedLengthVector()) {
6024 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6025 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6026 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6027 }
6028
6029 SDValue Mask, VL;
6030 if (Op->isVPOpcode()) {
6031 Mask = Op.getOperand(2);
6032 if (VT.isFixedLengthVector())
6033 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6034 Subtarget);
6035 VL = Op.getOperand(3);
6036 } else {
6037 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6038 }
6039
6040 SDValue NewY = Y;
6041 if (!XIsNeverNan) {
6042 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6043 {X, X, DAG.getCondCode(ISD::SETOEQ),
6044 DAG.getUNDEF(ContainerVT), Mask, VL});
6045 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6046 DAG.getUNDEF(ContainerVT), VL);
6047 }
6048
6049 SDValue NewX = X;
6050 if (!YIsNeverNan) {
6051 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6052 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6053 DAG.getUNDEF(ContainerVT), Mask, VL});
6054 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6055 DAG.getUNDEF(ContainerVT), VL);
6056 }
6057
6058 unsigned Opc =
6059 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6062 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6063 DAG.getUNDEF(ContainerVT), Mask, VL);
6064 if (VT.isFixedLengthVector())
6065 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6066 return Res;
6067}
6068
6070 const RISCVSubtarget &Subtarget) {
6071 bool IsFABS = Op.getOpcode() == ISD::FABS;
6072 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6073 "Wrong opcode for lowering FABS or FNEG.");
6074
6075 MVT XLenVT = Subtarget.getXLenVT();
6076 MVT VT = Op.getSimpleValueType();
6077 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6078
6079 SDLoc DL(Op);
6080 SDValue Fmv =
6081 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6082
6083 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6084 Mask = Mask.sext(Subtarget.getXLen());
6085
6086 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6087 SDValue Logic =
6088 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6089 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6090}
6091
6093 const RISCVSubtarget &Subtarget) {
6094 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6095
6096 MVT XLenVT = Subtarget.getXLenVT();
6097 MVT VT = Op.getSimpleValueType();
6098 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6099
6100 SDValue Mag = Op.getOperand(0);
6101 SDValue Sign = Op.getOperand(1);
6102
6103 SDLoc DL(Op);
6104
6105 // Get sign bit into an integer value.
6106 SDValue SignAsInt;
6107 unsigned SignSize = Sign.getValueSizeInBits();
6108 if (SignSize == Subtarget.getXLen()) {
6109 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6110 } else if (SignSize == 16) {
6111 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6112 } else if (SignSize == 32) {
6113 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6114 } else if (SignSize == 64) {
6115 assert(XLenVT == MVT::i32 && "Unexpected type");
6116 // Copy the upper word to integer.
6117 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6118 .getValue(1);
6119 SignSize = 32;
6120 } else
6121 llvm_unreachable("Unexpected sign size");
6122
6123 // Get the signbit at the right position for MagAsInt.
6124 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6125 if (ShiftAmount > 0) {
6126 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6127 DAG.getConstant(ShiftAmount, DL, XLenVT));
6128 } else if (ShiftAmount < 0) {
6129 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6130 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6131 }
6132
6133 // Mask the sign bit and any bits above it. The extra bits will be dropped
6134 // when we convert back to FP.
6135 SDValue SignMask = DAG.getConstant(
6136 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6137 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6138
6139 // Transform Mag value to integer, and clear the sign bit.
6140 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6141 SDValue ClearSignMask = DAG.getConstant(
6142 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6143 SDValue ClearedSign =
6144 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6145
6146 SDNodeFlags Flags;
6147 Flags.setDisjoint(true);
6148
6149 SDValue CopiedSign =
6150 DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit, Flags);
6151
6152 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6153}
6154
6155/// Get a RISC-V target specified VL op for a given SDNode.
6156static unsigned getRISCVVLOp(SDValue Op) {
6157#define OP_CASE(NODE) \
6158 case ISD::NODE: \
6159 return RISCVISD::NODE##_VL;
6160#define VP_CASE(NODE) \
6161 case ISD::VP_##NODE: \
6162 return RISCVISD::NODE##_VL;
6163 // clang-format off
6164 switch (Op.getOpcode()) {
6165 default:
6166 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6167 OP_CASE(ADD)
6168 OP_CASE(SUB)
6169 OP_CASE(MUL)
6170 OP_CASE(MULHS)
6171 OP_CASE(MULHU)
6172 OP_CASE(SDIV)
6173 OP_CASE(SREM)
6174 OP_CASE(UDIV)
6175 OP_CASE(UREM)
6176 OP_CASE(SHL)
6177 OP_CASE(SRA)
6178 OP_CASE(SRL)
6179 OP_CASE(ROTL)
6180 OP_CASE(ROTR)
6181 OP_CASE(BSWAP)
6182 OP_CASE(CTTZ)
6183 OP_CASE(CTLZ)
6184 OP_CASE(CTPOP)
6185 OP_CASE(BITREVERSE)
6186 OP_CASE(SADDSAT)
6187 OP_CASE(UADDSAT)
6188 OP_CASE(SSUBSAT)
6189 OP_CASE(USUBSAT)
6190 OP_CASE(AVGFLOORS)
6191 OP_CASE(AVGFLOORU)
6192 OP_CASE(AVGCEILS)
6193 OP_CASE(AVGCEILU)
6194 OP_CASE(FADD)
6195 OP_CASE(FSUB)
6196 OP_CASE(FMUL)
6197 OP_CASE(FDIV)
6198 OP_CASE(FNEG)
6199 OP_CASE(FABS)
6200 OP_CASE(FSQRT)
6201 OP_CASE(SMIN)
6202 OP_CASE(SMAX)
6203 OP_CASE(UMIN)
6204 OP_CASE(UMAX)
6205 OP_CASE(STRICT_FADD)
6206 OP_CASE(STRICT_FSUB)
6207 OP_CASE(STRICT_FMUL)
6208 OP_CASE(STRICT_FDIV)
6209 OP_CASE(STRICT_FSQRT)
6210 VP_CASE(ADD) // VP_ADD
6211 VP_CASE(SUB) // VP_SUB
6212 VP_CASE(MUL) // VP_MUL
6213 VP_CASE(SDIV) // VP_SDIV
6214 VP_CASE(SREM) // VP_SREM
6215 VP_CASE(UDIV) // VP_UDIV
6216 VP_CASE(UREM) // VP_UREM
6217 VP_CASE(SHL) // VP_SHL
6218 VP_CASE(FADD) // VP_FADD
6219 VP_CASE(FSUB) // VP_FSUB
6220 VP_CASE(FMUL) // VP_FMUL
6221 VP_CASE(FDIV) // VP_FDIV
6222 VP_CASE(FNEG) // VP_FNEG
6223 VP_CASE(FABS) // VP_FABS
6224 VP_CASE(SMIN) // VP_SMIN
6225 VP_CASE(SMAX) // VP_SMAX
6226 VP_CASE(UMIN) // VP_UMIN
6227 VP_CASE(UMAX) // VP_UMAX
6228 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6229 VP_CASE(SETCC) // VP_SETCC
6230 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6231 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6232 VP_CASE(BITREVERSE) // VP_BITREVERSE
6233 VP_CASE(SADDSAT) // VP_SADDSAT
6234 VP_CASE(UADDSAT) // VP_UADDSAT
6235 VP_CASE(SSUBSAT) // VP_SSUBSAT
6236 VP_CASE(USUBSAT) // VP_USUBSAT
6237 VP_CASE(BSWAP) // VP_BSWAP
6238 VP_CASE(CTLZ) // VP_CTLZ
6239 VP_CASE(CTTZ) // VP_CTTZ
6240 VP_CASE(CTPOP) // VP_CTPOP
6242 case ISD::VP_CTLZ_ZERO_UNDEF:
6243 return RISCVISD::CTLZ_VL;
6245 case ISD::VP_CTTZ_ZERO_UNDEF:
6246 return RISCVISD::CTTZ_VL;
6247 case ISD::FMA:
6248 case ISD::VP_FMA:
6249 return RISCVISD::VFMADD_VL;
6250 case ISD::STRICT_FMA:
6252 case ISD::AND:
6253 case ISD::VP_AND:
6254 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6255 return RISCVISD::VMAND_VL;
6256 return RISCVISD::AND_VL;
6257 case ISD::OR:
6258 case ISD::VP_OR:
6259 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6260 return RISCVISD::VMOR_VL;
6261 return RISCVISD::OR_VL;
6262 case ISD::XOR:
6263 case ISD::VP_XOR:
6264 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6265 return RISCVISD::VMXOR_VL;
6266 return RISCVISD::XOR_VL;
6267 case ISD::VP_SELECT:
6268 case ISD::VP_MERGE:
6269 return RISCVISD::VMERGE_VL;
6270 case ISD::VP_SRA:
6271 return RISCVISD::SRA_VL;
6272 case ISD::VP_SRL:
6273 return RISCVISD::SRL_VL;
6274 case ISD::VP_SQRT:
6275 return RISCVISD::FSQRT_VL;
6276 case ISD::VP_SIGN_EXTEND:
6277 return RISCVISD::VSEXT_VL;
6278 case ISD::VP_ZERO_EXTEND:
6279 return RISCVISD::VZEXT_VL;
6280 case ISD::VP_FP_TO_SINT:
6282 case ISD::VP_FP_TO_UINT:
6284 case ISD::FMINNUM:
6285 case ISD::VP_FMINNUM:
6286 return RISCVISD::VFMIN_VL;
6287 case ISD::FMAXNUM:
6288 case ISD::VP_FMAXNUM:
6289 return RISCVISD::VFMAX_VL;
6290 case ISD::LRINT:
6291 case ISD::VP_LRINT:
6292 case ISD::LLRINT:
6293 case ISD::VP_LLRINT:
6295 }
6296 // clang-format on
6297#undef OP_CASE
6298#undef VP_CASE
6299}
6300
6301/// Return true if a RISC-V target specified op has a passthru operand.
6302static bool hasPassthruOp(unsigned Opcode) {
6303 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6305 "not a RISC-V target specific op");
6307 130 &&
6310 21 &&
6311 "adding target specific op should update this function");
6312 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6313 return true;
6314 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6315 return true;
6316 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6317 return true;
6318 if (Opcode == RISCVISD::SETCC_VL)
6319 return true;
6320 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6321 return true;
6322 if (Opcode == RISCVISD::VMERGE_VL)
6323 return true;
6324 return false;
6325}
6326
6327/// Return true if a RISC-V target specified op has a mask operand.
6328static bool hasMaskOp(unsigned Opcode) {
6329 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6331 "not a RISC-V target specific op");
6333 130 &&
6336 21 &&
6337 "adding target specific op should update this function");
6338 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6339 return true;
6340 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6341 return true;
6342 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6344 return true;
6345 return false;
6346}
6347
6349 const RISCVSubtarget &Subtarget) {
6350 if (Op.getValueType() == MVT::nxv32f16 &&
6351 (Subtarget.hasVInstructionsF16Minimal() &&
6352 !Subtarget.hasVInstructionsF16()))
6353 return true;
6354 if (Op.getValueType() == MVT::nxv32bf16)
6355 return true;
6356 return false;
6357}
6358
6360 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6361 SDLoc DL(Op);
6362
6365
6366 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6367 if (!Op.getOperand(j).getValueType().isVector()) {
6368 LoOperands[j] = Op.getOperand(j);
6369 HiOperands[j] = Op.getOperand(j);
6370 continue;
6371 }
6372 std::tie(LoOperands[j], HiOperands[j]) =
6373 DAG.SplitVector(Op.getOperand(j), DL);
6374 }
6375
6376 SDValue LoRes =
6377 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6378 SDValue HiRes =
6379 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6380
6381 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6382}
6383
6385 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6386 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6387 SDLoc DL(Op);
6388
6391
6392 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6393 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6394 std::tie(LoOperands[j], HiOperands[j]) =
6395 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6396 continue;
6397 }
6398 if (!Op.getOperand(j).getValueType().isVector()) {
6399 LoOperands[j] = Op.getOperand(j);
6400 HiOperands[j] = Op.getOperand(j);
6401 continue;
6402 }
6403 std::tie(LoOperands[j], HiOperands[j]) =
6404 DAG.SplitVector(Op.getOperand(j), DL);
6405 }
6406
6407 SDValue LoRes =
6408 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6409 SDValue HiRes =
6410 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6411
6412 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6413}
6414
6416 SDLoc DL(Op);
6417
6418 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6419 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6420 auto [EVLLo, EVLHi] =
6421 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6422
6423 SDValue ResLo =
6424 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6425 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6426 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6427 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6428}
6429
6431
6432 assert(Op->isStrictFPOpcode());
6433
6434 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6435
6436 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6437 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6438
6439 SDLoc DL(Op);
6440
6443
6444 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6445 if (!Op.getOperand(j).getValueType().isVector()) {
6446 LoOperands[j] = Op.getOperand(j);
6447 HiOperands[j] = Op.getOperand(j);
6448 continue;
6449 }
6450 std::tie(LoOperands[j], HiOperands[j]) =
6451 DAG.SplitVector(Op.getOperand(j), DL);
6452 }
6453
6454 SDValue LoRes =
6455 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6456 HiOperands[0] = LoRes.getValue(1);
6457 SDValue HiRes =
6458 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6459
6460 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6461 LoRes.getValue(0), HiRes.getValue(0));
6462 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6463}
6464
6466 SelectionDAG &DAG) const {
6467 switch (Op.getOpcode()) {
6468 default:
6469 report_fatal_error("unimplemented operand");
6470 case ISD::ATOMIC_FENCE:
6471 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6472 case ISD::GlobalAddress:
6473 return lowerGlobalAddress(Op, DAG);
6474 case ISD::BlockAddress:
6475 return lowerBlockAddress(Op, DAG);
6476 case ISD::ConstantPool:
6477 return lowerConstantPool(Op, DAG);
6478 case ISD::JumpTable:
6479 return lowerJumpTable(Op, DAG);
6481 return lowerGlobalTLSAddress(Op, DAG);
6482 case ISD::Constant:
6483 return lowerConstant(Op, DAG, Subtarget);
6484 case ISD::ConstantFP:
6485 return lowerConstantFP(Op, DAG);
6486 case ISD::SELECT:
6487 return lowerSELECT(Op, DAG);
6488 case ISD::BRCOND:
6489 return lowerBRCOND(Op, DAG);
6490 case ISD::VASTART:
6491 return lowerVASTART(Op, DAG);
6492 case ISD::FRAMEADDR:
6493 return lowerFRAMEADDR(Op, DAG);
6494 case ISD::RETURNADDR:
6495 return lowerRETURNADDR(Op, DAG);
6496 case ISD::SHL_PARTS:
6497 return lowerShiftLeftParts(Op, DAG);
6498 case ISD::SRA_PARTS:
6499 return lowerShiftRightParts(Op, DAG, true);
6500 case ISD::SRL_PARTS:
6501 return lowerShiftRightParts(Op, DAG, false);
6502 case ISD::ROTL:
6503 case ISD::ROTR:
6504 if (Op.getValueType().isFixedLengthVector()) {
6505 assert(Subtarget.hasStdExtZvkb());
6506 return lowerToScalableOp(Op, DAG);
6507 }
6508 assert(Subtarget.hasVendorXTHeadBb() &&
6509 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6510 "Unexpected custom legalization");
6511 // XTHeadBb only supports rotate by constant.
6512 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6513 return SDValue();
6514 return Op;
6515 case ISD::BITCAST: {
6516 SDLoc DL(Op);
6517 EVT VT = Op.getValueType();
6518 SDValue Op0 = Op.getOperand(0);
6519 EVT Op0VT = Op0.getValueType();
6520 MVT XLenVT = Subtarget.getXLenVT();
6521 if (Op0VT == MVT::i16 &&
6522 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6523 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6524 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6525 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6526 }
6527 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6528 Subtarget.hasStdExtFOrZfinx()) {
6529 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6530 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6531 }
6532 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6533 SDValue Lo, Hi;
6534 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6535 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6536 }
6537
6538 // Consider other scalar<->scalar casts as legal if the types are legal.
6539 // Otherwise expand them.
6540 if (!VT.isVector() && !Op0VT.isVector()) {
6541 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6542 return Op;
6543 return SDValue();
6544 }
6545
6546 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6547 "Unexpected types");
6548
6549 if (VT.isFixedLengthVector()) {
6550 // We can handle fixed length vector bitcasts with a simple replacement
6551 // in isel.
6552 if (Op0VT.isFixedLengthVector())
6553 return Op;
6554 // When bitcasting from scalar to fixed-length vector, insert the scalar
6555 // into a one-element vector of the result type, and perform a vector
6556 // bitcast.
6557 if (!Op0VT.isVector()) {
6558 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6559 if (!isTypeLegal(BVT))
6560 return SDValue();
6561 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6562 DAG.getUNDEF(BVT), Op0,
6563 DAG.getVectorIdxConstant(0, DL)));
6564 }
6565 return SDValue();
6566 }
6567 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6568 // thus: bitcast the vector to a one-element vector type whose element type
6569 // is the same as the result type, and extract the first element.
6570 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6571 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6572 if (!isTypeLegal(BVT))
6573 return SDValue();
6574 SDValue BVec = DAG.getBitcast(BVT, Op0);
6575 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6576 DAG.getVectorIdxConstant(0, DL));
6577 }
6578 return SDValue();
6579 }
6581 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6583 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6585 return LowerINTRINSIC_VOID(Op, DAG);
6586 case ISD::IS_FPCLASS:
6587 return LowerIS_FPCLASS(Op, DAG);
6588 case ISD::BITREVERSE: {
6589 MVT VT = Op.getSimpleValueType();
6590 if (VT.isFixedLengthVector()) {
6591 assert(Subtarget.hasStdExtZvbb());
6592 return lowerToScalableOp(Op, DAG);
6593 }
6594 SDLoc DL(Op);
6595 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6596 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6597 // Expand bitreverse to a bswap(rev8) followed by brev8.
6598 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6599 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6600 }
6601 case ISD::TRUNCATE:
6604 // Only custom-lower vector truncates
6605 if (!Op.getSimpleValueType().isVector())
6606 return Op;
6607 return lowerVectorTruncLike(Op, DAG);
6608 case ISD::ANY_EXTEND:
6609 case ISD::ZERO_EXTEND:
6610 if (Op.getOperand(0).getValueType().isVector() &&
6611 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6612 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6613 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6614 case ISD::SIGN_EXTEND:
6615 if (Op.getOperand(0).getValueType().isVector() &&
6616 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6617 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6618 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6620 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6622 return lowerINSERT_VECTOR_ELT(Op, DAG);
6624 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6625 case ISD::SCALAR_TO_VECTOR: {
6626 MVT VT = Op.getSimpleValueType();
6627 SDLoc DL(Op);
6628 SDValue Scalar = Op.getOperand(0);
6629 if (VT.getVectorElementType() == MVT::i1) {
6630 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6631 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6632 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6633 }
6634 MVT ContainerVT = VT;
6635 if (VT.isFixedLengthVector())
6636 ContainerVT = getContainerForFixedLengthVector(VT);
6637 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6638 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6639 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6640 DAG.getUNDEF(ContainerVT), Scalar, VL);
6641 if (VT.isFixedLengthVector())
6642 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6643 return V;
6644 }
6645 case ISD::VSCALE: {
6646 MVT XLenVT = Subtarget.getXLenVT();
6647 MVT VT = Op.getSimpleValueType();
6648 SDLoc DL(Op);
6649 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6650 // We define our scalable vector types for lmul=1 to use a 64 bit known
6651 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6652 // vscale as VLENB / 8.
6653 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6654 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6655 report_fatal_error("Support for VLEN==32 is incomplete.");
6656 // We assume VLENB is a multiple of 8. We manually choose the best shift
6657 // here because SimplifyDemandedBits isn't always able to simplify it.
6658 uint64_t Val = Op.getConstantOperandVal(0);
6659 if (isPowerOf2_64(Val)) {
6660 uint64_t Log2 = Log2_64(Val);
6661 if (Log2 < 3)
6662 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6663 DAG.getConstant(3 - Log2, DL, VT));
6664 else if (Log2 > 3)
6665 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6666 DAG.getConstant(Log2 - 3, DL, XLenVT));
6667 } else if ((Val % 8) == 0) {
6668 // If the multiplier is a multiple of 8, scale it down to avoid needing
6669 // to shift the VLENB value.
6670 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6671 DAG.getConstant(Val / 8, DL, XLenVT));
6672 } else {
6673 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6674 DAG.getConstant(3, DL, XLenVT));
6675 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6676 DAG.getConstant(Val, DL, XLenVT));
6677 }
6678 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6679 }
6680 case ISD::FPOWI: {
6681 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6682 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6683 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6684 Op.getOperand(1).getValueType() == MVT::i32) {
6685 SDLoc DL(Op);
6686 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6687 SDValue Powi =
6688 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6689 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6690 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6691 }
6692 return SDValue();
6693 }
6694 case ISD::FMAXIMUM:
6695 case ISD::FMINIMUM:
6696 if (isPromotedOpNeedingSplit(Op, Subtarget))
6697 return SplitVectorOp(Op, DAG);
6698 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6699 case ISD::FP_EXTEND:
6700 case ISD::FP_ROUND:
6701 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6704 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6705 case ISD::SINT_TO_FP:
6706 case ISD::UINT_TO_FP:
6707 if (Op.getValueType().isVector() &&
6708 ((Op.getValueType().getScalarType() == MVT::f16 &&
6709 (Subtarget.hasVInstructionsF16Minimal() &&
6710 !Subtarget.hasVInstructionsF16())) ||
6711 Op.getValueType().getScalarType() == MVT::bf16)) {
6712 if (isPromotedOpNeedingSplit(Op, Subtarget))
6713 return SplitVectorOp(Op, DAG);
6714 // int -> f32
6715 SDLoc DL(Op);
6716 MVT NVT =
6717 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6718 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6719 // f32 -> [b]f16
6720 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6721 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6722 }
6723 [[fallthrough]];
6724 case ISD::FP_TO_SINT:
6725 case ISD::FP_TO_UINT:
6726 if (SDValue Op1 = Op.getOperand(0);
6727 Op1.getValueType().isVector() &&
6728 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6729 (Subtarget.hasVInstructionsF16Minimal() &&
6730 !Subtarget.hasVInstructionsF16())) ||
6731 Op1.getValueType().getScalarType() == MVT::bf16)) {
6732 if (isPromotedOpNeedingSplit(Op1, Subtarget))
6733 return SplitVectorOp(Op, DAG);
6734 // [b]f16 -> f32
6735 SDLoc DL(Op);
6736 MVT NVT = MVT::getVectorVT(MVT::f32,
6737 Op1.getValueType().getVectorElementCount());
6738 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6739 // f32 -> int
6740 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6741 }
6742 [[fallthrough]];
6747 // RVV can only do fp<->int conversions to types half/double the size as
6748 // the source. We custom-lower any conversions that do two hops into
6749 // sequences.
6750 MVT VT = Op.getSimpleValueType();
6751 if (VT.isScalarInteger())
6752 return lowerFP_TO_INT(Op, DAG, Subtarget);
6753 bool IsStrict = Op->isStrictFPOpcode();
6754 SDValue Src = Op.getOperand(0 + IsStrict);
6755 MVT SrcVT = Src.getSimpleValueType();
6756 if (SrcVT.isScalarInteger())
6757 return lowerINT_TO_FP(Op, DAG, Subtarget);
6758 if (!VT.isVector())
6759 return Op;
6760 SDLoc DL(Op);
6761 MVT EltVT = VT.getVectorElementType();
6762 MVT SrcEltVT = SrcVT.getVectorElementType();
6763 unsigned EltSize = EltVT.getSizeInBits();
6764 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6765 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6766 "Unexpected vector element types");
6767
6768 bool IsInt2FP = SrcEltVT.isInteger();
6769 // Widening conversions
6770 if (EltSize > (2 * SrcEltSize)) {
6771 if (IsInt2FP) {
6772 // Do a regular integer sign/zero extension then convert to float.
6773 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6775 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6776 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6779 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6780 if (IsStrict)
6781 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6782 Op.getOperand(0), Ext);
6783 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6784 }
6785 // FP2Int
6786 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6787 // Do one doubling fp_extend then complete the operation by converting
6788 // to int.
6789 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6790 if (IsStrict) {
6791 auto [FExt, Chain] =
6792 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6793 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6794 }
6795 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6796 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6797 }
6798
6799 // Narrowing conversions
6800 if (SrcEltSize > (2 * EltSize)) {
6801 if (IsInt2FP) {
6802 // One narrowing int_to_fp, then an fp_round.
6803 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6804 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6805 if (IsStrict) {
6806 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6807 DAG.getVTList(InterimFVT, MVT::Other),
6808 Op.getOperand(0), Src);
6809 SDValue Chain = Int2FP.getValue(1);
6810 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6811 }
6812 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6813 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6814 }
6815 // FP2Int
6816 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6817 // representable by the integer, the result is poison.
6818 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6820 if (IsStrict) {
6821 SDValue FP2Int =
6822 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6823 Op.getOperand(0), Src);
6824 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6825 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6826 }
6827 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6828 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6829 }
6830
6831 // Scalable vectors can exit here. Patterns will handle equally-sized
6832 // conversions halving/doubling ones.
6833 if (!VT.isFixedLengthVector())
6834 return Op;
6835
6836 // For fixed-length vectors we lower to a custom "VL" node.
6837 unsigned RVVOpc = 0;
6838 switch (Op.getOpcode()) {
6839 default:
6840 llvm_unreachable("Impossible opcode");
6841 case ISD::FP_TO_SINT:
6843 break;
6844 case ISD::FP_TO_UINT:
6846 break;
6847 case ISD::SINT_TO_FP:
6848 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6849 break;
6850 case ISD::UINT_TO_FP:
6851 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6852 break;
6855 break;
6858 break;
6861 break;
6864 break;
6865 }
6866
6867 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6868 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6869 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6870 "Expected same element count");
6871
6872 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6873
6874 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6875 if (IsStrict) {
6876 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6877 Op.getOperand(0), Src, Mask, VL);
6878 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6879 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6880 }
6881 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6882 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6883 }
6886 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6887 case ISD::FP_TO_BF16: {
6888 // Custom lower to ensure the libcall return is passed in an FPR on hard
6889 // float ABIs.
6890 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6891 SDLoc DL(Op);
6892 MakeLibCallOptions CallOptions;
6893 RTLIB::Libcall LC =
6894 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6895 SDValue Res =
6896 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6897 if (Subtarget.is64Bit())
6898 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6899 return DAG.getBitcast(MVT::i32, Res);
6900 }
6901 case ISD::BF16_TO_FP: {
6902 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6903 MVT VT = Op.getSimpleValueType();
6904 SDLoc DL(Op);
6905 Op = DAG.getNode(
6906 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6907 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6908 SDValue Res = Subtarget.is64Bit()
6909 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6910 : DAG.getBitcast(MVT::f32, Op);
6911 // fp_extend if the target VT is bigger than f32.
6912 if (VT != MVT::f32)
6913 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6914 return Res;
6915 }
6916 case ISD::STRICT_FP_TO_FP16:
6917 case ISD::FP_TO_FP16: {
6918 // Custom lower to ensure the libcall return is passed in an FPR on hard
6919 // float ABIs.
6920 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6921 SDLoc DL(Op);
6922 MakeLibCallOptions CallOptions;
6923 bool IsStrict = Op->isStrictFPOpcode();
6924 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
6925 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
6926 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
6927 SDValue Res;
6928 std::tie(Res, Chain) =
6929 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
6930 if (Subtarget.is64Bit())
6931 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6932 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
6933 if (IsStrict)
6934 return DAG.getMergeValues({Result, Chain}, DL);
6935 return Result;
6936 }
6937 case ISD::STRICT_FP16_TO_FP:
6938 case ISD::FP16_TO_FP: {
6939 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6940 // float ABIs.
6941 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6942 SDLoc DL(Op);
6943 MakeLibCallOptions CallOptions;
6944 bool IsStrict = Op->isStrictFPOpcode();
6945 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
6946 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
6947 SDValue Arg = Subtarget.is64Bit()
6948 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
6949 : DAG.getBitcast(MVT::f32, Op0);
6950 SDValue Res;
6951 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
6952 CallOptions, DL, Chain);
6953 if (IsStrict)
6954 return DAG.getMergeValues({Res, Chain}, DL);
6955 return Res;
6956 }
6957 case ISD::FTRUNC:
6958 case ISD::FCEIL:
6959 case ISD::FFLOOR:
6960 case ISD::FNEARBYINT:
6961 case ISD::FRINT:
6962 case ISD::FROUND:
6963 case ISD::FROUNDEVEN:
6964 if (isPromotedOpNeedingSplit(Op, Subtarget))
6965 return SplitVectorOp(Op, DAG);
6966 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6967 case ISD::LRINT:
6968 case ISD::LLRINT:
6969 if (Op.getValueType().isVector())
6970 return lowerVectorXRINT(Op, DAG, Subtarget);
6971 [[fallthrough]];
6972 case ISD::LROUND:
6973 case ISD::LLROUND: {
6974 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
6975 "Unexpected custom legalisation");
6976 SDLoc DL(Op);
6977 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6978 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
6979 }
6980 case ISD::STRICT_LRINT:
6981 case ISD::STRICT_LLRINT:
6982 case ISD::STRICT_LROUND:
6983 case ISD::STRICT_LLROUND: {
6984 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
6985 "Unexpected custom legalisation");
6986 SDLoc DL(Op);
6987 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
6988 {Op.getOperand(0), Op.getOperand(1)});
6989 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
6990 {Ext.getValue(1), Ext.getValue(0)});
6991 }
6992 case ISD::VECREDUCE_ADD:
6993 case ISD::VECREDUCE_UMAX:
6994 case ISD::VECREDUCE_SMAX:
6995 case ISD::VECREDUCE_UMIN:
6996 case ISD::VECREDUCE_SMIN:
6997 return lowerVECREDUCE(Op, DAG);
6998 case ISD::VECREDUCE_AND:
6999 case ISD::VECREDUCE_OR:
7000 case ISD::VECREDUCE_XOR:
7001 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7002 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7003 return lowerVECREDUCE(Op, DAG);
7004 case ISD::VECREDUCE_FADD:
7005 case ISD::VECREDUCE_SEQ_FADD:
7006 case ISD::VECREDUCE_FMIN:
7007 case ISD::VECREDUCE_FMAX:
7008 case ISD::VECREDUCE_FMAXIMUM:
7009 case ISD::VECREDUCE_FMINIMUM:
7010 return lowerFPVECREDUCE(Op, DAG);
7011 case ISD::VP_REDUCE_ADD:
7012 case ISD::VP_REDUCE_UMAX:
7013 case ISD::VP_REDUCE_SMAX:
7014 case ISD::VP_REDUCE_UMIN:
7015 case ISD::VP_REDUCE_SMIN:
7016 case ISD::VP_REDUCE_FADD:
7017 case ISD::VP_REDUCE_SEQ_FADD:
7018 case ISD::VP_REDUCE_FMIN:
7019 case ISD::VP_REDUCE_FMAX:
7020 case ISD::VP_REDUCE_FMINIMUM:
7021 case ISD::VP_REDUCE_FMAXIMUM:
7022 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7023 return SplitVectorReductionOp(Op, DAG);
7024 return lowerVPREDUCE(Op, DAG);
7025 case ISD::VP_REDUCE_AND:
7026 case ISD::VP_REDUCE_OR:
7027 case ISD::VP_REDUCE_XOR:
7028 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7029 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7030 return lowerVPREDUCE(Op, DAG);
7031 case ISD::VP_CTTZ_ELTS:
7032 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7033 return lowerVPCttzElements(Op, DAG);
7034 case ISD::UNDEF: {
7035 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7036 return convertFromScalableVector(Op.getSimpleValueType(),
7037 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7038 }
7040 return lowerINSERT_SUBVECTOR(Op, DAG);
7042 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7044 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7046 return lowerVECTOR_INTERLEAVE(Op, DAG);
7047 case ISD::STEP_VECTOR:
7048 return lowerSTEP_VECTOR(Op, DAG);
7050 return lowerVECTOR_REVERSE(Op, DAG);
7051 case ISD::VECTOR_SPLICE:
7052 return lowerVECTOR_SPLICE(Op, DAG);
7053 case ISD::BUILD_VECTOR:
7054 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7055 case ISD::SPLAT_VECTOR: {
7056 MVT VT = Op.getSimpleValueType();
7057 MVT EltVT = VT.getVectorElementType();
7058 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7059 EltVT == MVT::bf16) {
7060 SDLoc DL(Op);
7061 SDValue Elt;
7062 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7063 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7064 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7065 Op.getOperand(0));
7066 else
7067 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7068 MVT IVT = VT.changeVectorElementType(MVT::i16);
7069 return DAG.getNode(ISD::BITCAST, DL, VT,
7070 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7071 }
7072
7073 if (EltVT == MVT::i1)
7074 return lowerVectorMaskSplat(Op, DAG);
7075 return SDValue();
7076 }
7078 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7079 case ISD::CONCAT_VECTORS: {
7080 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7081 // better than going through the stack, as the default expansion does.
7082 SDLoc DL(Op);
7083 MVT VT = Op.getSimpleValueType();
7084 MVT ContainerVT = VT;
7085 if (VT.isFixedLengthVector())
7086 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7087
7088 // Recursively split concat_vectors with more than 2 operands:
7089 //
7090 // concat_vector op1, op2, op3, op4
7091 // ->
7092 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7093 //
7094 // This reduces the length of the chain of vslideups and allows us to
7095 // perform the vslideups at a smaller LMUL, limited to MF2.
7096 if (Op.getNumOperands() > 2 &&
7097 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7098 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7100 size_t HalfNumOps = Op.getNumOperands() / 2;
7101 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7102 Op->ops().take_front(HalfNumOps));
7103 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7104 Op->ops().drop_front(HalfNumOps));
7105 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7106 }
7107
7108 unsigned NumOpElts =
7109 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7110 SDValue Vec = DAG.getUNDEF(VT);
7111 for (const auto &OpIdx : enumerate(Op->ops())) {
7112 SDValue SubVec = OpIdx.value();
7113 // Don't insert undef subvectors.
7114 if (SubVec.isUndef())
7115 continue;
7116 Vec =
7117 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7118 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7119 }
7120 return Vec;
7121 }
7122 case ISD::LOAD: {
7123 auto *Load = cast<LoadSDNode>(Op);
7124 EVT VecTy = Load->getMemoryVT();
7125 // Handle normal vector tuple load.
7126 if (VecTy.isRISCVVectorTuple()) {
7127 SDLoc DL(Op);
7128 MVT XLenVT = Subtarget.getXLenVT();
7129 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7130 unsigned Sz = VecTy.getSizeInBits();
7131 unsigned NumElts = Sz / (NF * 8);
7132 int Log2LMUL = Log2_64(NumElts) - 3;
7133
7134 auto Flag = SDNodeFlags();
7135 Flag.setNoUnsignedWrap(true);
7136 SDValue Ret = DAG.getUNDEF(VecTy);
7137 SDValue BasePtr = Load->getBasePtr();
7138 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7139 VROffset =
7140 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7141 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7142 SmallVector<SDValue, 8> OutChains;
7143
7144 // Load NF vector registers and combine them to a vector tuple.
7145 for (unsigned i = 0; i < NF; ++i) {
7146 SDValue LoadVal = DAG.getLoad(
7147 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7148 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7149 OutChains.push_back(LoadVal.getValue(1));
7150 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7151 DAG.getVectorIdxConstant(i, DL));
7152 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7153 }
7154 return DAG.getMergeValues(
7155 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7156 }
7157
7158 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7159 return V;
7160 if (Op.getValueType().isFixedLengthVector())
7161 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7162 return Op;
7163 }
7164 case ISD::STORE: {
7165 auto *Store = cast<StoreSDNode>(Op);
7166 SDValue StoredVal = Store->getValue();
7167 EVT VecTy = StoredVal.getValueType();
7168 // Handle normal vector tuple store.
7169 if (VecTy.isRISCVVectorTuple()) {
7170 SDLoc DL(Op);
7171 MVT XLenVT = Subtarget.getXLenVT();
7172 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7173 unsigned Sz = VecTy.getSizeInBits();
7174 unsigned NumElts = Sz / (NF * 8);
7175 int Log2LMUL = Log2_64(NumElts) - 3;
7176
7177 auto Flag = SDNodeFlags();
7178 Flag.setNoUnsignedWrap(true);
7179 SDValue Ret;
7180 SDValue Chain = Store->getChain();
7181 SDValue BasePtr = Store->getBasePtr();
7182 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7183 VROffset =
7184 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7185 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7186
7187 // Extract subregisters in a vector tuple and store them individually.
7188 for (unsigned i = 0; i < NF; ++i) {
7189 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7190 MVT::getScalableVectorVT(MVT::i8, NumElts),
7191 StoredVal, DAG.getVectorIdxConstant(i, DL));
7192 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7193 MachinePointerInfo(Store->getAddressSpace()),
7194 Store->getOriginalAlign(),
7195 Store->getMemOperand()->getFlags());
7196 Chain = Ret.getValue(0);
7197 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7198 }
7199 return Ret;
7200 }
7201
7202 if (auto V = expandUnalignedRVVStore(Op, DAG))
7203 return V;
7204 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7205 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7206 return Op;
7207 }
7208 case ISD::MLOAD:
7209 case ISD::VP_LOAD:
7210 return lowerMaskedLoad(Op, DAG);
7211 case ISD::MSTORE:
7212 case ISD::VP_STORE:
7213 return lowerMaskedStore(Op, DAG);
7214 case ISD::SELECT_CC: {
7215 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7216 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7217 // into separate SETCC+SELECT just like LegalizeDAG.
7218 SDValue Tmp1 = Op.getOperand(0);
7219 SDValue Tmp2 = Op.getOperand(1);
7220 SDValue True = Op.getOperand(2);
7221 SDValue False = Op.getOperand(3);
7222 EVT VT = Op.getValueType();
7223 SDValue CC = Op.getOperand(4);
7224 EVT CmpVT = Tmp1.getValueType();
7225 EVT CCVT =
7226 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7227 SDLoc DL(Op);
7228 SDValue Cond =
7229 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7230 return DAG.getSelect(DL, VT, Cond, True, False);
7231 }
7232 case ISD::SETCC: {
7233 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7234 if (OpVT.isScalarInteger()) {
7235 MVT VT = Op.getSimpleValueType();
7236 SDValue LHS = Op.getOperand(0);
7237 SDValue RHS = Op.getOperand(1);
7238 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7239 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7240 "Unexpected CondCode");
7241
7242 SDLoc DL(Op);
7243
7244 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7245 // convert this to the equivalent of (set(u)ge X, C+1) by using
7246 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7247 // in a register.
7248 if (isa<ConstantSDNode>(RHS)) {
7249 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7250 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7251 // If this is an unsigned compare and the constant is -1, incrementing
7252 // the constant would change behavior. The result should be false.
7253 if (CCVal == ISD::SETUGT && Imm == -1)
7254 return DAG.getConstant(0, DL, VT);
7255 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7256 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7257 SDValue SetCC = DAG.getSetCC(
7258 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7259 return DAG.getLogicalNOT(DL, SetCC, VT);
7260 }
7261 }
7262
7263 // Not a constant we could handle, swap the operands and condition code to
7264 // SETLT/SETULT.
7265 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7266 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7267 }
7268
7269 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7270 return SplitVectorOp(Op, DAG);
7271
7272 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7273 }
7274 case ISD::ADD:
7275 case ISD::SUB:
7276 case ISD::MUL:
7277 case ISD::MULHS:
7278 case ISD::MULHU:
7279 case ISD::AND:
7280 case ISD::OR:
7281 case ISD::XOR:
7282 case ISD::SDIV:
7283 case ISD::SREM:
7284 case ISD::UDIV:
7285 case ISD::UREM:
7286 case ISD::BSWAP:
7287 case ISD::CTPOP:
7288 return lowerToScalableOp(Op, DAG);
7289 case ISD::SHL:
7290 case ISD::SRA:
7291 case ISD::SRL:
7292 if (Op.getSimpleValueType().isFixedLengthVector())
7293 return lowerToScalableOp(Op, DAG);
7294 // This can be called for an i32 shift amount that needs to be promoted.
7295 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7296 "Unexpected custom legalisation");
7297 return SDValue();
7298 case ISD::FABS:
7299 case ISD::FNEG:
7300 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7301 return lowerFABSorFNEG(Op, DAG, Subtarget);
7302 [[fallthrough]];
7303 case ISD::FADD:
7304 case ISD::FSUB:
7305 case ISD::FMUL:
7306 case ISD::FDIV:
7307 case ISD::FSQRT:
7308 case ISD::FMA:
7309 case ISD::FMINNUM:
7310 case ISD::FMAXNUM:
7311 if (isPromotedOpNeedingSplit(Op, Subtarget))
7312 return SplitVectorOp(Op, DAG);
7313 [[fallthrough]];
7314 case ISD::AVGFLOORS:
7315 case ISD::AVGFLOORU:
7316 case ISD::AVGCEILS:
7317 case ISD::AVGCEILU:
7318 case ISD::SMIN:
7319 case ISD::SMAX:
7320 case ISD::UMIN:
7321 case ISD::UMAX:
7322 case ISD::UADDSAT:
7323 case ISD::USUBSAT:
7324 case ISD::SADDSAT:
7325 case ISD::SSUBSAT:
7326 return lowerToScalableOp(Op, DAG);
7327 case ISD::ABDS:
7328 case ISD::ABDU: {
7329 SDLoc dl(Op);
7330 EVT VT = Op->getValueType(0);
7331 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7332 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7333 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7334
7335 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7336 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7337 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7338 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7339 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7340 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7341 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7342 }
7343 case ISD::ABS:
7344 case ISD::VP_ABS:
7345 return lowerABS(Op, DAG);
7346 case ISD::CTLZ:
7348 case ISD::CTTZ:
7350 if (Subtarget.hasStdExtZvbb())
7351 return lowerToScalableOp(Op, DAG);
7352 assert(Op.getOpcode() != ISD::CTTZ);
7353 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7354 case ISD::VSELECT:
7355 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7356 case ISD::FCOPYSIGN:
7357 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7358 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7359 if (isPromotedOpNeedingSplit(Op, Subtarget))
7360 return SplitVectorOp(Op, DAG);
7361 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7362 case ISD::STRICT_FADD:
7363 case ISD::STRICT_FSUB:
7364 case ISD::STRICT_FMUL:
7365 case ISD::STRICT_FDIV:
7366 case ISD::STRICT_FSQRT:
7367 case ISD::STRICT_FMA:
7368 if (isPromotedOpNeedingSplit(Op, Subtarget))
7369 return SplitStrictFPVectorOp(Op, DAG);
7370 return lowerToScalableOp(Op, DAG);
7371 case ISD::STRICT_FSETCC:
7373 return lowerVectorStrictFSetcc(Op, DAG);
7374 case ISD::STRICT_FCEIL:
7375 case ISD::STRICT_FRINT:
7376 case ISD::STRICT_FFLOOR:
7377 case ISD::STRICT_FTRUNC:
7379 case ISD::STRICT_FROUND:
7381 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7382 case ISD::MGATHER:
7383 case ISD::VP_GATHER:
7384 return lowerMaskedGather(Op, DAG);
7385 case ISD::MSCATTER:
7386 case ISD::VP_SCATTER:
7387 return lowerMaskedScatter(Op, DAG);
7388 case ISD::GET_ROUNDING:
7389 return lowerGET_ROUNDING(Op, DAG);
7390 case ISD::SET_ROUNDING:
7391 return lowerSET_ROUNDING(Op, DAG);
7392 case ISD::EH_DWARF_CFA:
7393 return lowerEH_DWARF_CFA(Op, DAG);
7394 case ISD::VP_SELECT:
7395 case ISD::VP_MERGE:
7396 case ISD::VP_ADD:
7397 case ISD::VP_SUB:
7398 case ISD::VP_MUL:
7399 case ISD::VP_SDIV:
7400 case ISD::VP_UDIV:
7401 case ISD::VP_SREM:
7402 case ISD::VP_UREM:
7403 case ISD::VP_UADDSAT:
7404 case ISD::VP_USUBSAT:
7405 case ISD::VP_SADDSAT:
7406 case ISD::VP_SSUBSAT:
7407 case ISD::VP_LRINT:
7408 case ISD::VP_LLRINT:
7409 return lowerVPOp(Op, DAG);
7410 case ISD::VP_AND:
7411 case ISD::VP_OR:
7412 case ISD::VP_XOR:
7413 return lowerLogicVPOp(Op, DAG);
7414 case ISD::VP_FADD:
7415 case ISD::VP_FSUB:
7416 case ISD::VP_FMUL:
7417 case ISD::VP_FDIV:
7418 case ISD::VP_FNEG:
7419 case ISD::VP_FABS:
7420 case ISD::VP_SQRT:
7421 case ISD::VP_FMA:
7422 case ISD::VP_FMINNUM:
7423 case ISD::VP_FMAXNUM:
7424 case ISD::VP_FCOPYSIGN:
7425 if (isPromotedOpNeedingSplit(Op, Subtarget))
7426 return SplitVPOp(Op, DAG);
7427 [[fallthrough]];
7428 case ISD::VP_SRA:
7429 case ISD::VP_SRL:
7430 case ISD::VP_SHL:
7431 return lowerVPOp(Op, DAG);
7432 case ISD::VP_IS_FPCLASS:
7433 return LowerIS_FPCLASS(Op, DAG);
7434 case ISD::VP_SIGN_EXTEND:
7435 case ISD::VP_ZERO_EXTEND:
7436 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7437 return lowerVPExtMaskOp(Op, DAG);
7438 return lowerVPOp(Op, DAG);
7439 case ISD::VP_TRUNCATE:
7440 return lowerVectorTruncLike(Op, DAG);
7441 case ISD::VP_FP_EXTEND:
7442 case ISD::VP_FP_ROUND:
7443 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7444 case ISD::VP_SINT_TO_FP:
7445 case ISD::VP_UINT_TO_FP:
7446 if (Op.getValueType().isVector() &&
7447 ((Op.getValueType().getScalarType() == MVT::f16 &&
7448 (Subtarget.hasVInstructionsF16Minimal() &&
7449 !Subtarget.hasVInstructionsF16())) ||
7450 Op.getValueType().getScalarType() == MVT::bf16)) {
7451 if (isPromotedOpNeedingSplit(Op, Subtarget))
7452 return SplitVectorOp(Op, DAG);
7453 // int -> f32
7454 SDLoc DL(Op);
7455 MVT NVT =
7456 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7457 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7458 // f32 -> [b]f16
7459 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7460 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7461 }
7462 [[fallthrough]];
7463 case ISD::VP_FP_TO_SINT:
7464 case ISD::VP_FP_TO_UINT:
7465 if (SDValue Op1 = Op.getOperand(0);
7466 Op1.getValueType().isVector() &&
7467 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7468 (Subtarget.hasVInstructionsF16Minimal() &&
7469 !Subtarget.hasVInstructionsF16())) ||
7470 Op1.getValueType().getScalarType() == MVT::bf16)) {
7471 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7472 return SplitVectorOp(Op, DAG);
7473 // [b]f16 -> f32
7474 SDLoc DL(Op);
7475 MVT NVT = MVT::getVectorVT(MVT::f32,
7476 Op1.getValueType().getVectorElementCount());
7477 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7478 // f32 -> int
7479 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7480 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7481 }
7482 return lowerVPFPIntConvOp(Op, DAG);
7483 case ISD::VP_SETCC:
7484 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7485 return SplitVPOp(Op, DAG);
7486 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7487 return lowerVPSetCCMaskOp(Op, DAG);
7488 [[fallthrough]];
7489 case ISD::VP_SMIN:
7490 case ISD::VP_SMAX:
7491 case ISD::VP_UMIN:
7492 case ISD::VP_UMAX:
7493 case ISD::VP_BITREVERSE:
7494 case ISD::VP_BSWAP:
7495 return lowerVPOp(Op, DAG);
7496 case ISD::VP_CTLZ:
7497 case ISD::VP_CTLZ_ZERO_UNDEF:
7498 if (Subtarget.hasStdExtZvbb())
7499 return lowerVPOp(Op, DAG);
7500 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7501 case ISD::VP_CTTZ:
7502 case ISD::VP_CTTZ_ZERO_UNDEF:
7503 if (Subtarget.hasStdExtZvbb())
7504 return lowerVPOp(Op, DAG);
7505 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7506 case ISD::VP_CTPOP:
7507 return lowerVPOp(Op, DAG);
7508 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7509 return lowerVPStridedLoad(Op, DAG);
7510 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7511 return lowerVPStridedStore(Op, DAG);
7512 case ISD::VP_FCEIL:
7513 case ISD::VP_FFLOOR:
7514 case ISD::VP_FRINT:
7515 case ISD::VP_FNEARBYINT:
7516 case ISD::VP_FROUND:
7517 case ISD::VP_FROUNDEVEN:
7518 case ISD::VP_FROUNDTOZERO:
7519 if (isPromotedOpNeedingSplit(Op, Subtarget))
7520 return SplitVPOp(Op, DAG);
7521 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7522 case ISD::VP_FMAXIMUM:
7523 case ISD::VP_FMINIMUM:
7524 if (isPromotedOpNeedingSplit(Op, Subtarget))
7525 return SplitVPOp(Op, DAG);
7526 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7527 case ISD::EXPERIMENTAL_VP_SPLICE:
7528 return lowerVPSpliceExperimental(Op, DAG);
7529 case ISD::EXPERIMENTAL_VP_REVERSE:
7530 return lowerVPReverseExperimental(Op, DAG);
7531 case ISD::EXPERIMENTAL_VP_SPLAT:
7532 return lowerVPSplatExperimental(Op, DAG);
7533 case ISD::CLEAR_CACHE: {
7534 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7535 "llvm.clear_cache only needs custom lower on Linux targets");
7536 SDLoc DL(Op);
7537 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7538 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7539 Op.getOperand(2), Flags, DL);
7540 }
7541 }
7542}
7543
7544SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7545 SDValue Start, SDValue End,
7546 SDValue Flags, SDLoc DL) const {
7547 MakeLibCallOptions CallOptions;
7548 std::pair<SDValue, SDValue> CallResult =
7549 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7550 {Start, End, Flags}, CallOptions, DL, InChain);
7551
7552 // This function returns void so only the out chain matters.
7553 return CallResult.second;
7554}
7555
7557 SelectionDAG &DAG, unsigned Flags) {
7558 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7559}
7560
7562 SelectionDAG &DAG, unsigned Flags) {
7563 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7564 Flags);
7565}
7566
7568 SelectionDAG &DAG, unsigned Flags) {
7569 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7570 N->getOffset(), Flags);
7571}
7572
7574 SelectionDAG &DAG, unsigned Flags) {
7575 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7576}
7577
7579 EVT Ty, SelectionDAG &DAG) {
7581 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7582 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7583 return DAG.getLoad(
7584 Ty, DL, DAG.getEntryNode(), LC,
7586}
7587
7589 EVT Ty, SelectionDAG &DAG) {
7591 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7592 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7593 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7594 return DAG.getLoad(
7595 Ty, DL, DAG.getEntryNode(), LC,
7597}
7598
7599template <class NodeTy>
7600SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7601 bool IsLocal, bool IsExternWeak) const {
7602 SDLoc DL(N);
7603 EVT Ty = getPointerTy(DAG.getDataLayout());
7604
7605 // When HWASAN is used and tagging of global variables is enabled
7606 // they should be accessed via the GOT, since the tagged address of a global
7607 // is incompatible with existing code models. This also applies to non-pic
7608 // mode.
7609 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7610 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7611 if (IsLocal && !Subtarget.allowTaggedGlobals())
7612 // Use PC-relative addressing to access the symbol. This generates the
7613 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7614 // %pcrel_lo(auipc)).
7615 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7616
7617 // Use PC-relative addressing to access the GOT for this symbol, then load
7618 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7619 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7620 SDValue Load =
7621 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7627 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7628 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7629 return Load;
7630 }
7631
7632 switch (getTargetMachine().getCodeModel()) {
7633 default:
7634 report_fatal_error("Unsupported code model for lowering");
7635 case CodeModel::Small: {
7636 // Generate a sequence for accessing addresses within the first 2 GiB of
7637 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7638 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7639 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7640 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7641 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7642 }
7643 case CodeModel::Medium: {
7644 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7645 if (IsExternWeak) {
7646 // An extern weak symbol may be undefined, i.e. have value 0, which may
7647 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7648 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7649 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7650 SDValue Load =
7651 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7657 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7658 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7659 return Load;
7660 }
7661
7662 // Generate a sequence for accessing addresses within any 2GiB range within
7663 // the address space. This generates the pattern (PseudoLLA sym), which
7664 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7665 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7666 }
7667 case CodeModel::Large: {
7669 return getLargeGlobalAddress(G, DL, Ty, DAG);
7670
7671 // Using pc-relative mode for other node type.
7672 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7673 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7674 }
7675 }
7676}
7677
7678SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7679 SelectionDAG &DAG) const {
7681 assert(N->getOffset() == 0 && "unexpected offset in global node");
7682 const GlobalValue *GV = N->getGlobal();
7683 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7684}
7685
7686SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7687 SelectionDAG &DAG) const {
7689
7690 return getAddr(N, DAG);
7691}
7692
7693SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7694 SelectionDAG &DAG) const {
7696
7697 return getAddr(N, DAG);
7698}
7699
7700SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7701 SelectionDAG &DAG) const {
7703
7704 return getAddr(N, DAG);
7705}
7706
7707SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7708 SelectionDAG &DAG,
7709 bool UseGOT) const {
7710 SDLoc DL(N);
7711 EVT Ty = getPointerTy(DAG.getDataLayout());
7712 const GlobalValue *GV = N->getGlobal();
7713 MVT XLenVT = Subtarget.getXLenVT();
7714
7715 if (UseGOT) {
7716 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7717 // load the address from the GOT and add the thread pointer. This generates
7718 // the pattern (PseudoLA_TLS_IE sym), which expands to
7719 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7720 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7721 SDValue Load =
7722 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7728 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7729 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7730
7731 // Add the thread pointer.
7732 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7733 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7734 }
7735
7736 // Generate a sequence for accessing the address relative to the thread
7737 // pointer, with the appropriate adjustment for the thread pointer offset.
7738 // This generates the pattern
7739 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7740 SDValue AddrHi =
7742 SDValue AddrAdd =
7744 SDValue AddrLo =
7746
7747 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7748 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7749 SDValue MNAdd =
7750 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7751 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7752}
7753
7754SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7755 SelectionDAG &DAG) const {
7756 SDLoc DL(N);
7757 EVT Ty = getPointerTy(DAG.getDataLayout());
7758 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7759 const GlobalValue *GV = N->getGlobal();
7760
7761 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7762 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7763 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7764 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7765 SDValue Load =
7766 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7767
7768 // Prepare argument list to generate call.
7769 ArgListTy Args;
7770 ArgListEntry Entry;
7771 Entry.Node = Load;
7772 Entry.Ty = CallTy;
7773 Args.push_back(Entry);
7774
7775 // Setup call to __tls_get_addr.
7777 CLI.setDebugLoc(DL)
7778 .setChain(DAG.getEntryNode())
7779 .setLibCallee(CallingConv::C, CallTy,
7780 DAG.getExternalSymbol("__tls_get_addr", Ty),
7781 std::move(Args));
7782
7783 return LowerCallTo(CLI).first;
7784}
7785
7786SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7787 SelectionDAG &DAG) const {
7788 SDLoc DL(N);
7789 EVT Ty = getPointerTy(DAG.getDataLayout());
7790 const GlobalValue *GV = N->getGlobal();
7791
7792 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7793 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7794 //
7795 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7796 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7797 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7798 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7799 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7800 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7801}
7802
7803SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7804 SelectionDAG &DAG) const {
7806 assert(N->getOffset() == 0 && "unexpected offset in global node");
7807
7808 if (DAG.getTarget().useEmulatedTLS())
7809 return LowerToTLSEmulatedModel(N, DAG);
7810
7811 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
7812
7815 report_fatal_error("In GHC calling convention TLS is not supported");
7816
7817 SDValue Addr;
7818 switch (Model) {
7820 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7821 break;
7823 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7824 break;
7827 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7828 : getDynamicTLSAddr(N, DAG);
7829 break;
7830 }
7831
7832 return Addr;
7833}
7834
7835// Return true if Val is equal to (setcc LHS, RHS, CC).
7836// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7837// Otherwise, return std::nullopt.
7838static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7839 ISD::CondCode CC, SDValue Val) {
7840 assert(Val->getOpcode() == ISD::SETCC);
7841 SDValue LHS2 = Val.getOperand(0);
7842 SDValue RHS2 = Val.getOperand(1);
7843 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7844
7845 if (LHS == LHS2 && RHS == RHS2) {
7846 if (CC == CC2)
7847 return true;
7848 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7849 return false;
7850 } else if (LHS == RHS2 && RHS == LHS2) {
7852 if (CC == CC2)
7853 return true;
7854 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7855 return false;
7856 }
7857
7858 return std::nullopt;
7859}
7860
7862 const RISCVSubtarget &Subtarget) {
7863 SDValue CondV = N->getOperand(0);
7864 SDValue TrueV = N->getOperand(1);
7865 SDValue FalseV = N->getOperand(2);
7866 MVT VT = N->getSimpleValueType(0);
7867 SDLoc DL(N);
7868
7869 if (!Subtarget.hasConditionalMoveFusion()) {
7870 // (select c, -1, y) -> -c | y
7871 if (isAllOnesConstant(TrueV)) {
7872 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7873 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7874 }
7875 // (select c, y, -1) -> (c-1) | y
7876 if (isAllOnesConstant(FalseV)) {
7877 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7878 DAG.getAllOnesConstant(DL, VT));
7879 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7880 }
7881
7882 // (select c, 0, y) -> (c-1) & y
7883 if (isNullConstant(TrueV)) {
7884 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7885 DAG.getAllOnesConstant(DL, VT));
7886 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7887 }
7888 // (select c, y, 0) -> -c & y
7889 if (isNullConstant(FalseV)) {
7890 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7891 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7892 }
7893 }
7894
7895 // select c, ~x, x --> xor -c, x
7896 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7897 const APInt &TrueVal = TrueV->getAsAPIntVal();
7898 const APInt &FalseVal = FalseV->getAsAPIntVal();
7899 if (~TrueVal == FalseVal) {
7900 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7901 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7902 }
7903 }
7904
7905 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7906 // when both truev and falsev are also setcc.
7907 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7908 FalseV.getOpcode() == ISD::SETCC) {
7909 SDValue LHS = CondV.getOperand(0);
7910 SDValue RHS = CondV.getOperand(1);
7912
7913 // (select x, x, y) -> x | y
7914 // (select !x, x, y) -> x & y
7915 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7916 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7917 DAG.getFreeze(FalseV));
7918 }
7919 // (select x, y, x) -> x & y
7920 // (select !x, y, x) -> x | y
7921 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7922 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7923 DAG.getFreeze(TrueV), FalseV);
7924 }
7925 }
7926
7927 return SDValue();
7928}
7929
7930// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7931// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7932// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7933// being `0` or `-1`. In such cases we can replace `select` with `and`.
7934// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7935// than `c0`?
7936static SDValue
7938 const RISCVSubtarget &Subtarget) {
7939 if (Subtarget.hasShortForwardBranchOpt())
7940 return SDValue();
7941
7942 unsigned SelOpNo = 0;
7943 SDValue Sel = BO->getOperand(0);
7944 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7945 SelOpNo = 1;
7946 Sel = BO->getOperand(1);
7947 }
7948
7949 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7950 return SDValue();
7951
7952 unsigned ConstSelOpNo = 1;
7953 unsigned OtherSelOpNo = 2;
7954 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7955 ConstSelOpNo = 2;
7956 OtherSelOpNo = 1;
7957 }
7958 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7959 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7960 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7961 return SDValue();
7962
7963 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7964 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7965 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7966 return SDValue();
7967
7968 SDLoc DL(Sel);
7969 EVT VT = BO->getValueType(0);
7970
7971 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7972 if (SelOpNo == 1)
7973 std::swap(NewConstOps[0], NewConstOps[1]);
7974
7975 SDValue NewConstOp =
7976 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7977 if (!NewConstOp)
7978 return SDValue();
7979
7980 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7981 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7982 return SDValue();
7983
7984 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7985 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7986 if (SelOpNo == 1)
7987 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7988 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7989
7990 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7991 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7992 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7993}
7994
7995SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7996 SDValue CondV = Op.getOperand(0);
7997 SDValue TrueV = Op.getOperand(1);
7998 SDValue FalseV = Op.getOperand(2);
7999 SDLoc DL(Op);
8000 MVT VT = Op.getSimpleValueType();
8001 MVT XLenVT = Subtarget.getXLenVT();
8002
8003 // Lower vector SELECTs to VSELECTs by splatting the condition.
8004 if (VT.isVector()) {
8005 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8006 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8007 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8008 }
8009
8010 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8011 // nodes to implement the SELECT. Performing the lowering here allows for
8012 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8013 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8014 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8015 VT.isScalarInteger()) {
8016 // (select c, t, 0) -> (czero_eqz t, c)
8017 if (isNullConstant(FalseV))
8018 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8019 // (select c, 0, f) -> (czero_nez f, c)
8020 if (isNullConstant(TrueV))
8021 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8022
8023 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8024 if (TrueV.getOpcode() == ISD::AND &&
8025 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8026 return DAG.getNode(
8027 ISD::OR, DL, VT, TrueV,
8028 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8029 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8030 if (FalseV.getOpcode() == ISD::AND &&
8031 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8032 return DAG.getNode(
8033 ISD::OR, DL, VT, FalseV,
8034 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8035
8036 // Try some other optimizations before falling back to generic lowering.
8037 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8038 return V;
8039
8040 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8041 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8042 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8043 const APInt &TrueVal = TrueV->getAsAPIntVal();
8044 const APInt &FalseVal = FalseV->getAsAPIntVal();
8045 const int TrueValCost = RISCVMatInt::getIntMatCost(
8046 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8047 const int FalseValCost = RISCVMatInt::getIntMatCost(
8048 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8049 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8050 SDValue LHSVal = DAG.getConstant(
8051 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8052 SDValue RHSVal =
8053 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8054 SDValue CMOV =
8056 DL, VT, LHSVal, CondV);
8057 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8058 }
8059
8060 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8061 // Unless we have the short forward branch optimization.
8062 if (!Subtarget.hasConditionalMoveFusion())
8063 return DAG.getNode(
8064 ISD::OR, DL, VT,
8065 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8066 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8067 }
8068
8069 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8070 return V;
8071
8072 if (Op.hasOneUse()) {
8073 unsigned UseOpc = Op->use_begin()->getOpcode();
8074 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8075 SDNode *BinOp = *Op->use_begin();
8076 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
8077 DAG, Subtarget)) {
8078 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8079 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8080 // may return a constant node and cause crash in lowerSELECT.
8081 if (NewSel.getOpcode() == ISD::SELECT)
8082 return lowerSELECT(NewSel, DAG);
8083 return NewSel;
8084 }
8085 }
8086 }
8087
8088 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8089 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8090 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8091 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8092 if (FPTV && FPFV) {
8093 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8094 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8095 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8096 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8097 DAG.getConstant(1, DL, XLenVT));
8098 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8099 }
8100 }
8101
8102 // If the condition is not an integer SETCC which operates on XLenVT, we need
8103 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8104 // (select condv, truev, falsev)
8105 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8106 if (CondV.getOpcode() != ISD::SETCC ||
8107 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8108 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8109 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8110
8111 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8112
8113 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8114 }
8115
8116 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8117 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8118 // advantage of the integer compare+branch instructions. i.e.:
8119 // (select (setcc lhs, rhs, cc), truev, falsev)
8120 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8121 SDValue LHS = CondV.getOperand(0);
8122 SDValue RHS = CondV.getOperand(1);
8123 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8124
8125 // Special case for a select of 2 constants that have a diffence of 1.
8126 // Normally this is done by DAGCombine, but if the select is introduced by
8127 // type legalization or op legalization, we miss it. Restricting to SETLT
8128 // case for now because that is what signed saturating add/sub need.
8129 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8130 // but we would probably want to swap the true/false values if the condition
8131 // is SETGE/SETLE to avoid an XORI.
8132 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8133 CCVal == ISD::SETLT) {
8134 const APInt &TrueVal = TrueV->getAsAPIntVal();
8135 const APInt &FalseVal = FalseV->getAsAPIntVal();
8136 if (TrueVal - 1 == FalseVal)
8137 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8138 if (TrueVal + 1 == FalseVal)
8139 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8140 }
8141
8142 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8143 // 1 < x ? x : 1 -> 0 < x ? x : 1
8144 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8145 RHS == TrueV && LHS == FalseV) {
8146 LHS = DAG.getConstant(0, DL, VT);
8147 // 0 <u x is the same as x != 0.
8148 if (CCVal == ISD::SETULT) {
8149 std::swap(LHS, RHS);
8150 CCVal = ISD::SETNE;
8151 }
8152 }
8153
8154 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8155 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8156 RHS == FalseV) {
8157 RHS = DAG.getConstant(0, DL, VT);
8158 }
8159
8160 SDValue TargetCC = DAG.getCondCode(CCVal);
8161
8162 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8163 // (select (setcc lhs, rhs, CC), constant, falsev)
8164 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8165 std::swap(TrueV, FalseV);
8166 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8167 }
8168
8169 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8170 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8171}
8172
8173SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8174 SDValue CondV = Op.getOperand(1);
8175 SDLoc DL(Op);
8176 MVT XLenVT = Subtarget.getXLenVT();
8177
8178 if (CondV.getOpcode() == ISD::SETCC &&
8179 CondV.getOperand(0).getValueType() == XLenVT) {
8180 SDValue LHS = CondV.getOperand(0);
8181 SDValue RHS = CondV.getOperand(1);
8182 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8183
8184 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8185
8186 SDValue TargetCC = DAG.getCondCode(CCVal);
8187 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8188 LHS, RHS, TargetCC, Op.getOperand(2));
8189 }
8190
8191 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8192 CondV, DAG.getConstant(0, DL, XLenVT),
8193 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8194}
8195
8196SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8199
8200 SDLoc DL(Op);
8201 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8203
8204 // vastart just stores the address of the VarArgsFrameIndex slot into the
8205 // memory location argument.
8206 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8207 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8208 MachinePointerInfo(SV));
8209}
8210
8211SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8212 SelectionDAG &DAG) const {
8213 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8215 MachineFrameInfo &MFI = MF.getFrameInfo();
8216 MFI.setFrameAddressIsTaken(true);
8217 Register FrameReg = RI.getFrameRegister(MF);
8218 int XLenInBytes = Subtarget.getXLen() / 8;
8219
8220 EVT VT = Op.getValueType();
8221 SDLoc DL(Op);
8222 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8223 unsigned Depth = Op.getConstantOperandVal(0);
8224 while (Depth--) {
8225 int Offset = -(XLenInBytes * 2);
8226 SDValue Ptr = DAG.getNode(
8227 ISD::ADD, DL, VT, FrameAddr,
8229 FrameAddr =
8230 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8231 }
8232 return FrameAddr;
8233}
8234
8235SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8236 SelectionDAG &DAG) const {
8237 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8239 MachineFrameInfo &MFI = MF.getFrameInfo();
8240 MFI.setReturnAddressIsTaken(true);
8241 MVT XLenVT = Subtarget.getXLenVT();
8242 int XLenInBytes = Subtarget.getXLen() / 8;
8243
8245 return SDValue();
8246
8247 EVT VT = Op.getValueType();
8248 SDLoc DL(Op);
8249 unsigned Depth = Op.getConstantOperandVal(0);
8250 if (Depth) {
8251 int Off = -XLenInBytes;
8252 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8253 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8254 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8255 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8257 }
8258
8259 // Return the value of the return address register, marking it an implicit
8260 // live-in.
8261 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8262 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8263}
8264
8265SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8266 SelectionDAG &DAG) const {
8267 SDLoc DL(Op);
8268 SDValue Lo = Op.getOperand(0);
8269 SDValue Hi = Op.getOperand(1);
8270 SDValue Shamt = Op.getOperand(2);
8271 EVT VT = Lo.getValueType();
8272
8273 // if Shamt-XLEN < 0: // Shamt < XLEN
8274 // Lo = Lo << Shamt
8275 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8276 // else:
8277 // Lo = 0
8278 // Hi = Lo << (Shamt-XLEN)
8279
8280 SDValue Zero = DAG.getConstant(0, DL, VT);
8281 SDValue One = DAG.getConstant(1, DL, VT);
8282 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8283 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8284 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8285 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8286
8287 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8288 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8289 SDValue ShiftRightLo =
8290 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8291 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8292 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8293 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8294
8295 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8296
8297 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8298 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8299
8300 SDValue Parts[2] = {Lo, Hi};
8301 return DAG.getMergeValues(Parts, DL);
8302}
8303
8304SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8305 bool IsSRA) const {
8306 SDLoc DL(Op);
8307 SDValue Lo = Op.getOperand(0);
8308 SDValue Hi = Op.getOperand(1);
8309 SDValue Shamt = Op.getOperand(2);
8310 EVT VT = Lo.getValueType();
8311
8312 // SRA expansion:
8313 // if Shamt-XLEN < 0: // Shamt < XLEN
8314 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8315 // Hi = Hi >>s Shamt
8316 // else:
8317 // Lo = Hi >>s (Shamt-XLEN);
8318 // Hi = Hi >>s (XLEN-1)
8319 //
8320 // SRL expansion:
8321 // if Shamt-XLEN < 0: // Shamt < XLEN
8322 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8323 // Hi = Hi >>u Shamt
8324 // else:
8325 // Lo = Hi >>u (Shamt-XLEN);
8326 // Hi = 0;
8327
8328 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8329
8330 SDValue Zero = DAG.getConstant(0, DL, VT);
8331 SDValue One = DAG.getConstant(1, DL, VT);
8332 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8333 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8334 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8335 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8336
8337 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8338 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8339 SDValue ShiftLeftHi =
8340 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8341 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8342 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8343 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8344 SDValue HiFalse =
8345 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8346
8347 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8348
8349 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8350 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8351
8352 SDValue Parts[2] = {Lo, Hi};
8353 return DAG.getMergeValues(Parts, DL);
8354}
8355
8356// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8357// legal equivalently-sized i8 type, so we can use that as a go-between.
8358SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8359 SelectionDAG &DAG) const {
8360 SDLoc DL(Op);
8361 MVT VT = Op.getSimpleValueType();
8362 SDValue SplatVal = Op.getOperand(0);
8363 // All-zeros or all-ones splats are handled specially.
8364 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8365 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8366 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8367 }
8368 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8369 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8370 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8371 }
8372 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8373 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8374 DAG.getConstant(1, DL, SplatVal.getValueType()));
8375 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8376 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8377 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8378}
8379
8380// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8381// illegal (currently only vXi64 RV32).
8382// FIXME: We could also catch non-constant sign-extended i32 values and lower
8383// them to VMV_V_X_VL.
8384SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8385 SelectionDAG &DAG) const {
8386 SDLoc DL(Op);
8387 MVT VecVT = Op.getSimpleValueType();
8388 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8389 "Unexpected SPLAT_VECTOR_PARTS lowering");
8390
8391 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8392 SDValue Lo = Op.getOperand(0);
8393 SDValue Hi = Op.getOperand(1);
8394
8395 MVT ContainerVT = VecVT;
8396 if (VecVT.isFixedLengthVector())
8397 ContainerVT = getContainerForFixedLengthVector(VecVT);
8398
8399 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8400
8401 SDValue Res =
8402 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8403
8404 if (VecVT.isFixedLengthVector())
8405 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8406
8407 return Res;
8408}
8409
8410// Custom-lower extensions from mask vectors by using a vselect either with 1
8411// for zero/any-extension or -1 for sign-extension:
8412// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8413// Note that any-extension is lowered identically to zero-extension.
8414SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8415 int64_t ExtTrueVal) const {
8416 SDLoc DL(Op);
8417 MVT VecVT = Op.getSimpleValueType();
8418 SDValue Src = Op.getOperand(0);
8419 // Only custom-lower extensions from mask types
8420 assert(Src.getValueType().isVector() &&
8421 Src.getValueType().getVectorElementType() == MVT::i1);
8422
8423 if (VecVT.isScalableVector()) {
8424 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8425 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8426 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8427 }
8428
8429 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8430 MVT I1ContainerVT =
8431 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8432
8433 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8434
8435 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8436
8437 MVT XLenVT = Subtarget.getXLenVT();
8438 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8439 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8440
8441 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8442 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8443 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8444 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8445 SDValue Select =
8446 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8447 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8448
8449 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8450}
8451
8452SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8453 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8454 MVT ExtVT = Op.getSimpleValueType();
8455 // Only custom-lower extensions from fixed-length vector types.
8456 if (!ExtVT.isFixedLengthVector())
8457 return Op;
8458 MVT VT = Op.getOperand(0).getSimpleValueType();
8459 // Grab the canonical container type for the extended type. Infer the smaller
8460 // type from that to ensure the same number of vector elements, as we know
8461 // the LMUL will be sufficient to hold the smaller type.
8462 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8463 // Get the extended container type manually to ensure the same number of
8464 // vector elements between source and dest.
8465 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8466 ContainerExtVT.getVectorElementCount());
8467
8468 SDValue Op1 =
8469 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8470
8471 SDLoc DL(Op);
8472 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8473
8474 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8475
8476 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8477}
8478
8479// Custom-lower truncations from vectors to mask vectors by using a mask and a
8480// setcc operation:
8481// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8482SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8483 SelectionDAG &DAG) const {
8484 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8485 SDLoc DL(Op);
8486 EVT MaskVT = Op.getValueType();
8487 // Only expect to custom-lower truncations to mask types
8488 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8489 "Unexpected type for vector mask lowering");
8490 SDValue Src = Op.getOperand(0);
8491 MVT VecVT = Src.getSimpleValueType();
8492 SDValue Mask, VL;
8493 if (IsVPTrunc) {
8494 Mask = Op.getOperand(1);
8495 VL = Op.getOperand(2);
8496 }
8497 // If this is a fixed vector, we need to convert it to a scalable vector.
8498 MVT ContainerVT = VecVT;
8499
8500 if (VecVT.isFixedLengthVector()) {
8501 ContainerVT = getContainerForFixedLengthVector(VecVT);
8502 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8503 if (IsVPTrunc) {
8504 MVT MaskContainerVT =
8505 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8506 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8507 }
8508 }
8509
8510 if (!IsVPTrunc) {
8511 std::tie(Mask, VL) =
8512 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8513 }
8514
8515 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8516 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8517
8518 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8519 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8520 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8521 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8522
8523 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8524 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8525 DAG.getUNDEF(ContainerVT), Mask, VL);
8526 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8527 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8528 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8529 if (MaskVT.isFixedLengthVector())
8530 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8531 return Trunc;
8532}
8533
8534SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8535 SelectionDAG &DAG) const {
8536 unsigned Opc = Op.getOpcode();
8537 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8538 SDLoc DL(Op);
8539
8540 MVT VT = Op.getSimpleValueType();
8541 // Only custom-lower vector truncates
8542 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8543
8544 // Truncates to mask types are handled differently
8545 if (VT.getVectorElementType() == MVT::i1)
8546 return lowerVectorMaskTruncLike(Op, DAG);
8547
8548 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8549 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8550 // truncate by one power of two at a time.
8551 MVT DstEltVT = VT.getVectorElementType();
8552
8553 SDValue Src = Op.getOperand(0);
8554 MVT SrcVT = Src.getSimpleValueType();
8555 MVT SrcEltVT = SrcVT.getVectorElementType();
8556
8557 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8558 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8559 "Unexpected vector truncate lowering");
8560
8561 MVT ContainerVT = SrcVT;
8562 SDValue Mask, VL;
8563 if (IsVPTrunc) {
8564 Mask = Op.getOperand(1);
8565 VL = Op.getOperand(2);
8566 }
8567 if (SrcVT.isFixedLengthVector()) {
8568 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8569 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8570 if (IsVPTrunc) {
8571 MVT MaskVT = getMaskTypeFor(ContainerVT);
8572 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8573 }
8574 }
8575
8576 SDValue Result = Src;
8577 if (!IsVPTrunc) {
8578 std::tie(Mask, VL) =
8579 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8580 }
8581
8582 unsigned NewOpc;
8583 if (Opc == ISD::TRUNCATE_SSAT_S)
8585 else if (Opc == ISD::TRUNCATE_USAT_U)
8587 else
8589
8590 do {
8591 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8592 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8593 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8594 } while (SrcEltVT != DstEltVT);
8595
8596 if (SrcVT.isFixedLengthVector())
8597 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8598
8599 return Result;
8600}
8601
8602SDValue
8603RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8604 SelectionDAG &DAG) const {
8605 SDLoc DL(Op);
8606 SDValue Chain = Op.getOperand(0);
8607 SDValue Src = Op.getOperand(1);
8608 MVT VT = Op.getSimpleValueType();
8609 MVT SrcVT = Src.getSimpleValueType();
8610 MVT ContainerVT = VT;
8611 if (VT.isFixedLengthVector()) {
8612 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8613 ContainerVT =
8614 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8615 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8616 }
8617
8618 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8619
8620 // RVV can only widen/truncate fp to types double/half the size as the source.
8621 if ((VT.getVectorElementType() == MVT::f64 &&
8622 (SrcVT.getVectorElementType() == MVT::f16 ||
8623 SrcVT.getVectorElementType() == MVT::bf16)) ||
8624 ((VT.getVectorElementType() == MVT::f16 ||
8625 VT.getVectorElementType() == MVT::bf16) &&
8626 SrcVT.getVectorElementType() == MVT::f64)) {
8627 // For double rounding, the intermediate rounding should be round-to-odd.
8628 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8631 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8632 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8633 Chain, Src, Mask, VL);
8634 Chain = Src.getValue(1);
8635 }
8636
8637 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8640 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8641 Chain, Src, Mask, VL);
8642 if (VT.isFixedLengthVector()) {
8643 // StrictFP operations have two result values. Their lowered result should
8644 // have same result count.
8645 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8646 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8647 }
8648 return Res;
8649}
8650
8651SDValue
8652RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8653 SelectionDAG &DAG) const {
8654 bool IsVP =
8655 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8656 bool IsExtend =
8657 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8658 // RVV can only do truncate fp to types half the size as the source. We
8659 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8660 // conversion instruction.
8661 SDLoc DL(Op);
8662 MVT VT = Op.getSimpleValueType();
8663
8664 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8665
8666 SDValue Src = Op.getOperand(0);
8667 MVT SrcVT = Src.getSimpleValueType();
8668
8669 bool IsDirectExtend =
8670 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8671 (SrcVT.getVectorElementType() != MVT::f16 &&
8672 SrcVT.getVectorElementType() != MVT::bf16));
8673 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8674 VT.getVectorElementType() != MVT::bf16) ||
8675 SrcVT.getVectorElementType() != MVT::f64);
8676
8677 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8678
8679 // Prepare any fixed-length vector operands.
8680 MVT ContainerVT = VT;
8681 SDValue Mask, VL;
8682 if (IsVP) {
8683 Mask = Op.getOperand(1);
8684 VL = Op.getOperand(2);
8685 }
8686 if (VT.isFixedLengthVector()) {
8687 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8688 ContainerVT =
8689 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8690 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8691 if (IsVP) {
8692 MVT MaskVT = getMaskTypeFor(ContainerVT);
8693 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8694 }
8695 }
8696
8697 if (!IsVP)
8698 std::tie(Mask, VL) =
8699 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8700
8701 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8702
8703 if (IsDirectConv) {
8704 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8705 if (VT.isFixedLengthVector())
8706 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8707 return Src;
8708 }
8709
8710 unsigned InterConvOpc =
8712
8713 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8714 SDValue IntermediateConv =
8715 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8716 SDValue Result =
8717 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8718 if (VT.isFixedLengthVector())
8719 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8720 return Result;
8721}
8722
8723// Given a scalable vector type and an index into it, returns the type for the
8724// smallest subvector that the index fits in. This can be used to reduce LMUL
8725// for operations like vslidedown.
8726//
8727// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8728static std::optional<MVT>
8729getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8730 const RISCVSubtarget &Subtarget) {
8731 assert(VecVT.isScalableVector());
8732 const unsigned EltSize = VecVT.getScalarSizeInBits();
8733 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8734 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8735 MVT SmallerVT;
8736 if (MaxIdx < MinVLMAX)
8737 SmallerVT = getLMUL1VT(VecVT);
8738 else if (MaxIdx < MinVLMAX * 2)
8739 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8740 else if (MaxIdx < MinVLMAX * 4)
8741 SmallerVT = getLMUL1VT(VecVT)
8744 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8745 return std::nullopt;
8746 return SmallerVT;
8747}
8748
8749// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8750// first position of a vector, and that vector is slid up to the insert index.
8751// By limiting the active vector length to index+1 and merging with the
8752// original vector (with an undisturbed tail policy for elements >= VL), we
8753// achieve the desired result of leaving all elements untouched except the one
8754// at VL-1, which is replaced with the desired value.
8755SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8756 SelectionDAG &DAG) const {
8757 SDLoc DL(Op);
8758 MVT VecVT = Op.getSimpleValueType();
8759 SDValue Vec = Op.getOperand(0);
8760 SDValue Val = Op.getOperand(1);
8761 SDValue Idx = Op.getOperand(2);
8762
8763 if (VecVT.getVectorElementType() == MVT::i1) {
8764 // FIXME: For now we just promote to an i8 vector and insert into that,
8765 // but this is probably not optimal.
8766 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8767 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8768 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8769 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8770 }
8771
8772 MVT ContainerVT = VecVT;
8773 // If the operand is a fixed-length vector, convert to a scalable one.
8774 if (VecVT.isFixedLengthVector()) {
8775 ContainerVT = getContainerForFixedLengthVector(VecVT);
8776 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8777 }
8778
8779 // If we know the index we're going to insert at, we can shrink Vec so that
8780 // we're performing the scalar inserts and slideup on a smaller LMUL.
8781 MVT OrigContainerVT = ContainerVT;
8782 SDValue OrigVec = Vec;
8783 SDValue AlignedIdx;
8784 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8785 const unsigned OrigIdx = IdxC->getZExtValue();
8786 // Do we know an upper bound on LMUL?
8787 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8788 DL, DAG, Subtarget)) {
8789 ContainerVT = *ShrunkVT;
8790 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8791 }
8792
8793 // If we're compiling for an exact VLEN value, we can always perform
8794 // the insert in m1 as we can determine the register corresponding to
8795 // the index in the register group.
8796 const MVT M1VT = getLMUL1VT(ContainerVT);
8797 if (auto VLEN = Subtarget.getRealVLen();
8798 VLEN && ContainerVT.bitsGT(M1VT)) {
8799 EVT ElemVT = VecVT.getVectorElementType();
8800 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8801 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8802 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8803 unsigned ExtractIdx =
8804 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8805 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8806 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8807 ContainerVT = M1VT;
8808 }
8809
8810 if (AlignedIdx)
8811 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8812 AlignedIdx);
8813 }
8814
8815 MVT XLenVT = Subtarget.getXLenVT();
8816
8817 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8818 // Even i64-element vectors on RV32 can be lowered without scalar
8819 // legalization if the most-significant 32 bits of the value are not affected
8820 // by the sign-extension of the lower 32 bits.
8821 // TODO: We could also catch sign extensions of a 32-bit value.
8822 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8823 const auto *CVal = cast<ConstantSDNode>(Val);
8824 if (isInt<32>(CVal->getSExtValue())) {
8825 IsLegalInsert = true;
8826 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
8827 }
8828 }
8829
8830 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8831
8832 SDValue ValInVec;
8833
8834 if (IsLegalInsert) {
8835 unsigned Opc =
8837 if (isNullConstant(Idx)) {
8838 if (!VecVT.isFloatingPoint())
8839 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8840 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8841
8842 if (AlignedIdx)
8843 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8844 Vec, AlignedIdx);
8845 if (!VecVT.isFixedLengthVector())
8846 return Vec;
8847 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8848 }
8849 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8850 } else {
8851 // On RV32, i64-element vectors must be specially handled to place the
8852 // value at element 0, by using two vslide1down instructions in sequence on
8853 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8854 // this.
8855 SDValue ValLo, ValHi;
8856 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8857 MVT I32ContainerVT =
8858 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8859 SDValue I32Mask =
8860 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8861 // Limit the active VL to two.
8862 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8863 // If the Idx is 0 we can insert directly into the vector.
8864 if (isNullConstant(Idx)) {
8865 // First slide in the lo value, then the hi in above it. We use slide1down
8866 // to avoid the register group overlap constraint of vslide1up.
8867 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8868 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8869 // If the source vector is undef don't pass along the tail elements from
8870 // the previous slide1down.
8871 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8872 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8873 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8874 // Bitcast back to the right container type.
8875 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8876
8877 if (AlignedIdx)
8878 ValInVec =
8879 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8880 ValInVec, AlignedIdx);
8881 if (!VecVT.isFixedLengthVector())
8882 return ValInVec;
8883 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8884 }
8885
8886 // First slide in the lo value, then the hi in above it. We use slide1down
8887 // to avoid the register group overlap constraint of vslide1up.
8888 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8889 DAG.getUNDEF(I32ContainerVT),
8890 DAG.getUNDEF(I32ContainerVT), ValLo,
8891 I32Mask, InsertI64VL);
8892 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8893 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8894 I32Mask, InsertI64VL);
8895 // Bitcast back to the right container type.
8896 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8897 }
8898
8899 // Now that the value is in a vector, slide it into position.
8900 SDValue InsertVL =
8901 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8902
8903 // Use tail agnostic policy if Idx is the last index of Vec.
8905 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8906 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8907 Policy = RISCVII::TAIL_AGNOSTIC;
8908 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8909 Idx, Mask, InsertVL, Policy);
8910
8911 if (AlignedIdx)
8912 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8913 Slideup, AlignedIdx);
8914 if (!VecVT.isFixedLengthVector())
8915 return Slideup;
8916 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8917}
8918
8919// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8920// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8921// types this is done using VMV_X_S to allow us to glean information about the
8922// sign bits of the result.
8923SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8924 SelectionDAG &DAG) const {
8925 SDLoc DL(Op);
8926 SDValue Idx = Op.getOperand(1);
8927 SDValue Vec = Op.getOperand(0);
8928 EVT EltVT = Op.getValueType();
8929 MVT VecVT = Vec.getSimpleValueType();
8930 MVT XLenVT = Subtarget.getXLenVT();
8931
8932 if (VecVT.getVectorElementType() == MVT::i1) {
8933 // Use vfirst.m to extract the first bit.
8934 if (isNullConstant(Idx)) {
8935 MVT ContainerVT = VecVT;
8936 if (VecVT.isFixedLengthVector()) {
8937 ContainerVT = getContainerForFixedLengthVector(VecVT);
8938 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8939 }
8940 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8941 SDValue Vfirst =
8942 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8943 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8944 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8945 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8946 }
8947 if (VecVT.isFixedLengthVector()) {
8948 unsigned NumElts = VecVT.getVectorNumElements();
8949 if (NumElts >= 8) {
8950 MVT WideEltVT;
8951 unsigned WidenVecLen;
8952 SDValue ExtractElementIdx;
8953 SDValue ExtractBitIdx;
8954 unsigned MaxEEW = Subtarget.getELen();
8955 MVT LargestEltVT = MVT::getIntegerVT(
8956 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8957 if (NumElts <= LargestEltVT.getSizeInBits()) {
8958 assert(isPowerOf2_32(NumElts) &&
8959 "the number of elements should be power of 2");
8960 WideEltVT = MVT::getIntegerVT(NumElts);
8961 WidenVecLen = 1;
8962 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8963 ExtractBitIdx = Idx;
8964 } else {
8965 WideEltVT = LargestEltVT;
8966 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8967 // extract element index = index / element width
8968 ExtractElementIdx = DAG.getNode(
8969 ISD::SRL, DL, XLenVT, Idx,
8970 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8971 // mask bit index = index % element width
8972 ExtractBitIdx = DAG.getNode(
8973 ISD::AND, DL, XLenVT, Idx,
8974 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8975 }
8976 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8977 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8978 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8979 Vec, ExtractElementIdx);
8980 // Extract the bit from GPR.
8981 SDValue ShiftRight =
8982 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8983 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8984 DAG.getConstant(1, DL, XLenVT));
8985 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8986 }
8987 }
8988 // Otherwise, promote to an i8 vector and extract from that.
8989 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8990 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8991 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8992 }
8993
8994 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
8995 EltVT == MVT::bf16) {
8996 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
8997 MVT IntVT = VecVT.changeTypeToInteger();
8998 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
8999 SDValue IntExtract =
9000 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9001 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9002 }
9003
9004 // If this is a fixed vector, we need to convert it to a scalable vector.
9005 MVT ContainerVT = VecVT;
9006 if (VecVT.isFixedLengthVector()) {
9007 ContainerVT = getContainerForFixedLengthVector(VecVT);
9008 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9009 }
9010
9011 // If we're compiling for an exact VLEN value and we have a known
9012 // constant index, we can always perform the extract in m1 (or
9013 // smaller) as we can determine the register corresponding to
9014 // the index in the register group.
9015 const auto VLen = Subtarget.getRealVLen();
9016 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9017 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9018 MVT M1VT = getLMUL1VT(ContainerVT);
9019 unsigned OrigIdx = IdxC->getZExtValue();
9020 EVT ElemVT = VecVT.getVectorElementType();
9021 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9022 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9023 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9024 unsigned ExtractIdx =
9025 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9026 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9027 DAG.getVectorIdxConstant(ExtractIdx, DL));
9028 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9029 ContainerVT = M1VT;
9030 }
9031
9032 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9033 // contains our index.
9034 std::optional<uint64_t> MaxIdx;
9035 if (VecVT.isFixedLengthVector())
9036 MaxIdx = VecVT.getVectorNumElements() - 1;
9037 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9038 MaxIdx = IdxC->getZExtValue();
9039 if (MaxIdx) {
9040 if (auto SmallerVT =
9041 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9042 ContainerVT = *SmallerVT;
9043 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9044 DAG.getConstant(0, DL, XLenVT));
9045 }
9046 }
9047
9048 // If after narrowing, the required slide is still greater than LMUL2,
9049 // fallback to generic expansion and go through the stack. This is done
9050 // for a subtle reason: extracting *all* elements out of a vector is
9051 // widely expected to be linear in vector size, but because vslidedown
9052 // is linear in LMUL, performing N extracts using vslidedown becomes
9053 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9054 // seems to have the same problem (the store is linear in LMUL), but the
9055 // generic expansion *memoizes* the store, and thus for many extracts of
9056 // the same vector we end up with one store and a bunch of loads.
9057 // TODO: We don't have the same code for insert_vector_elt because we
9058 // have BUILD_VECTOR and handle the degenerate case there. Should we
9059 // consider adding an inverse BUILD_VECTOR node?
9060 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9061 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9062 return SDValue();
9063
9064 // If the index is 0, the vector is already in the right position.
9065 if (!isNullConstant(Idx)) {
9066 // Use a VL of 1 to avoid processing more elements than we need.
9067 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9068 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9069 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9070 }
9071
9072 if (!EltVT.isInteger()) {
9073 // Floating-point extracts are handled in TableGen.
9074 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9075 DAG.getVectorIdxConstant(0, DL));
9076 }
9077
9078 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9079 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9080}
9081
9082// Some RVV intrinsics may claim that they want an integer operand to be
9083// promoted or expanded.
9085 const RISCVSubtarget &Subtarget) {
9086 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9087 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9088 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9089 "Unexpected opcode");
9090
9091 if (!Subtarget.hasVInstructions())
9092 return SDValue();
9093
9094 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9095 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9096 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9097
9098 SDLoc DL(Op);
9099
9101 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9102 if (!II || !II->hasScalarOperand())
9103 return SDValue();
9104
9105 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9106 assert(SplatOp < Op.getNumOperands());
9107
9109 SDValue &ScalarOp = Operands[SplatOp];
9110 MVT OpVT = ScalarOp.getSimpleValueType();
9111 MVT XLenVT = Subtarget.getXLenVT();
9112
9113 // If this isn't a scalar, or its type is XLenVT we're done.
9114 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9115 return SDValue();
9116
9117 // Simplest case is that the operand needs to be promoted to XLenVT.
9118 if (OpVT.bitsLT(XLenVT)) {
9119 // If the operand is a constant, sign extend to increase our chances
9120 // of being able to use a .vi instruction. ANY_EXTEND would become a
9121 // a zero extend and the simm5 check in isel would fail.
9122 // FIXME: Should we ignore the upper bits in isel instead?
9123 unsigned ExtOpc =
9125 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9126 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9127 }
9128
9129 // Use the previous operand to get the vXi64 VT. The result might be a mask
9130 // VT for compares. Using the previous operand assumes that the previous
9131 // operand will never have a smaller element size than a scalar operand and
9132 // that a widening operation never uses SEW=64.
9133 // NOTE: If this fails the below assert, we can probably just find the
9134 // element count from any operand or result and use it to construct the VT.
9135 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9136 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9137
9138 // The more complex case is when the scalar is larger than XLenVT.
9139 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9140 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9141
9142 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9143 // instruction to sign-extend since SEW>XLEN.
9144 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9145 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9146 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9147 }
9148
9149 switch (IntNo) {
9150 case Intrinsic::riscv_vslide1up:
9151 case Intrinsic::riscv_vslide1down:
9152 case Intrinsic::riscv_vslide1up_mask:
9153 case Intrinsic::riscv_vslide1down_mask: {
9154 // We need to special case these when the scalar is larger than XLen.
9155 unsigned NumOps = Op.getNumOperands();
9156 bool IsMasked = NumOps == 7;
9157
9158 // Convert the vector source to the equivalent nxvXi32 vector.
9159 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9160 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9161 SDValue ScalarLo, ScalarHi;
9162 std::tie(ScalarLo, ScalarHi) =
9163 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9164
9165 // Double the VL since we halved SEW.
9166 SDValue AVL = getVLOperand(Op);
9167 SDValue I32VL;
9168
9169 // Optimize for constant AVL
9170 if (isa<ConstantSDNode>(AVL)) {
9171 const auto [MinVLMAX, MaxVLMAX] =
9173
9174 uint64_t AVLInt = AVL->getAsZExtVal();
9175 if (AVLInt <= MinVLMAX) {
9176 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9177 } else if (AVLInt >= 2 * MaxVLMAX) {
9178 // Just set vl to VLMAX in this situation
9179 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9180 } else {
9181 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9182 // is related to the hardware implementation.
9183 // So let the following code handle
9184 }
9185 }
9186 if (!I32VL) {
9188 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9189 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9190 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9191 SDValue SETVL =
9192 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9193 // Using vsetvli instruction to get actually used length which related to
9194 // the hardware implementation
9195 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9196 SEW, LMUL);
9197 I32VL =
9198 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9199 }
9200
9201 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9202
9203 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9204 // instructions.
9205 SDValue Passthru;
9206 if (IsMasked)
9207 Passthru = DAG.getUNDEF(I32VT);
9208 else
9209 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9210
9211 if (IntNo == Intrinsic::riscv_vslide1up ||
9212 IntNo == Intrinsic::riscv_vslide1up_mask) {
9213 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9214 ScalarHi, I32Mask, I32VL);
9215 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9216 ScalarLo, I32Mask, I32VL);
9217 } else {
9218 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9219 ScalarLo, I32Mask, I32VL);
9220 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9221 ScalarHi, I32Mask, I32VL);
9222 }
9223
9224 // Convert back to nxvXi64.
9225 Vec = DAG.getBitcast(VT, Vec);
9226
9227 if (!IsMasked)
9228 return Vec;
9229 // Apply mask after the operation.
9230 SDValue Mask = Operands[NumOps - 3];
9231 SDValue MaskedOff = Operands[1];
9232 // Assume Policy operand is the last operand.
9233 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9234 // We don't need to select maskedoff if it's undef.
9235 if (MaskedOff.isUndef())
9236 return Vec;
9237 // TAMU
9238 if (Policy == RISCVII::TAIL_AGNOSTIC)
9239 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9240 DAG.getUNDEF(VT), AVL);
9241 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9242 // It's fine because vmerge does not care mask policy.
9243 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9244 MaskedOff, AVL);
9245 }
9246 }
9247
9248 // We need to convert the scalar to a splat vector.
9249 SDValue VL = getVLOperand(Op);
9250 assert(VL.getValueType() == XLenVT);
9251 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9252 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9253}
9254
9255// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9256// scalable vector llvm.get.vector.length for now.
9257//
9258// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9259// (vscale * VF). The vscale and VF are independent of element width. We use
9260// SEW=8 for the vsetvli because it is the only element width that supports all
9261// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9262// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9263// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9264// SEW and LMUL are better for the surrounding vector instructions.
9266 const RISCVSubtarget &Subtarget) {
9267 MVT XLenVT = Subtarget.getXLenVT();
9268
9269 // The smallest LMUL is only valid for the smallest element width.
9270 const unsigned ElementWidth = 8;
9271
9272 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9273 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9274 // We don't support VF==1 with ELEN==32.
9275 [[maybe_unused]] unsigned MinVF =
9276 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9277
9278 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9279 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9280 "Unexpected VF");
9281
9282 bool Fractional = VF < LMul1VF;
9283 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9284 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9285 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9286
9287 SDLoc DL(N);
9288
9289 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9290 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9291
9292 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9293
9294 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9295 SDValue Res =
9296 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9297 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9298}
9299
9301 const RISCVSubtarget &Subtarget) {
9302 SDValue Op0 = N->getOperand(1);
9303 MVT OpVT = Op0.getSimpleValueType();
9304 MVT ContainerVT = OpVT;
9305 if (OpVT.isFixedLengthVector()) {
9306 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9307 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9308 }
9309 MVT XLenVT = Subtarget.getXLenVT();
9310 SDLoc DL(N);
9311 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9312 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9313 if (isOneConstant(N->getOperand(2)))
9314 return Res;
9315
9316 // Convert -1 to VL.
9317 SDValue Setcc =
9318 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9319 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9320 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9321}
9322
9323static inline void promoteVCIXScalar(const SDValue &Op,
9325 SelectionDAG &DAG) {
9326 const RISCVSubtarget &Subtarget =
9328
9329 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9330 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9331 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9332 SDLoc DL(Op);
9333
9335 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9336 if (!II || !II->hasScalarOperand())
9337 return;
9338
9339 unsigned SplatOp = II->ScalarOperand + 1;
9340 assert(SplatOp < Op.getNumOperands());
9341
9342 SDValue &ScalarOp = Operands[SplatOp];
9343 MVT OpVT = ScalarOp.getSimpleValueType();
9344 MVT XLenVT = Subtarget.getXLenVT();
9345
9346 // The code below is partially copied from lowerVectorIntrinsicScalars.
9347 // If this isn't a scalar, or its type is XLenVT we're done.
9348 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9349 return;
9350
9351 // Manually emit promote operation for scalar operation.
9352 if (OpVT.bitsLT(XLenVT)) {
9353 unsigned ExtOpc =
9355 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9356 }
9357
9358 return;
9359}
9360
9361static void processVCIXOperands(SDValue &OrigOp,
9363 SelectionDAG &DAG) {
9364 promoteVCIXScalar(OrigOp, Operands, DAG);
9365 const RISCVSubtarget &Subtarget =
9367 for (SDValue &V : Operands) {
9368 EVT ValType = V.getValueType();
9369 if (ValType.isVector() && ValType.isFloatingPoint()) {
9370 MVT InterimIVT =
9371 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9372 ValType.getVectorElementCount());
9373 V = DAG.getBitcast(InterimIVT, V);
9374 }
9375 if (ValType.isFixedLengthVector()) {
9376 MVT OpContainerVT = getContainerForFixedLengthVector(
9377 DAG, V.getSimpleValueType(), Subtarget);
9378 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9379 }
9380 }
9381}
9382
9383// LMUL * VLEN should be greater than or equal to EGS * SEW
9384static inline bool isValidEGW(int EGS, EVT VT,
9385 const RISCVSubtarget &Subtarget) {
9386 return (Subtarget.getRealMinVLen() *
9388 EGS * VT.getScalarSizeInBits();
9389}
9390
9391SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9392 SelectionDAG &DAG) const {
9393 unsigned IntNo = Op.getConstantOperandVal(0);
9394 SDLoc DL(Op);
9395 MVT XLenVT = Subtarget.getXLenVT();
9396
9397 switch (IntNo) {
9398 default:
9399 break; // Don't custom lower most intrinsics.
9400 case Intrinsic::riscv_tuple_insert: {
9401 SDValue Vec = Op.getOperand(1);
9402 SDValue SubVec = Op.getOperand(2);
9403 SDValue Index = Op.getOperand(3);
9404
9405 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9406 SubVec, Index);
9407 }
9408 case Intrinsic::riscv_tuple_extract: {
9409 SDValue Vec = Op.getOperand(1);
9410 SDValue Index = Op.getOperand(2);
9411
9412 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9413 Index);
9414 }
9415 case Intrinsic::thread_pointer: {
9416 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9417 return DAG.getRegister(RISCV::X4, PtrVT);
9418 }
9419 case Intrinsic::riscv_orc_b:
9420 case Intrinsic::riscv_brev8:
9421 case Intrinsic::riscv_sha256sig0:
9422 case Intrinsic::riscv_sha256sig1:
9423 case Intrinsic::riscv_sha256sum0:
9424 case Intrinsic::riscv_sha256sum1:
9425 case Intrinsic::riscv_sm3p0:
9426 case Intrinsic::riscv_sm3p1: {
9427 unsigned Opc;
9428 switch (IntNo) {
9429 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9430 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9431 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9432 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9433 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9434 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9435 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9436 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9437 }
9438
9439 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9440 }
9441 case Intrinsic::riscv_sm4ks:
9442 case Intrinsic::riscv_sm4ed: {
9443 unsigned Opc =
9444 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9445
9446 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9447 Op.getOperand(3));
9448 }
9449 case Intrinsic::riscv_zip:
9450 case Intrinsic::riscv_unzip: {
9451 unsigned Opc =
9452 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9453 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9454 }
9455 case Intrinsic::riscv_mopr:
9456 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9457 Op.getOperand(2));
9458
9459 case Intrinsic::riscv_moprr: {
9460 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9461 Op.getOperand(2), Op.getOperand(3));
9462 }
9463 case Intrinsic::riscv_clmul:
9464 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9465 Op.getOperand(2));
9466 case Intrinsic::riscv_clmulh:
9467 case Intrinsic::riscv_clmulr: {
9468 unsigned Opc =
9469 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9470 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9471 }
9472 case Intrinsic::experimental_get_vector_length:
9473 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9474 case Intrinsic::experimental_cttz_elts:
9475 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9476 case Intrinsic::riscv_vmv_x_s: {
9477 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9478 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9479 }
9480 case Intrinsic::riscv_vfmv_f_s:
9481 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9482 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9483 case Intrinsic::riscv_vmv_v_x:
9484 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9485 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9486 Subtarget);
9487 case Intrinsic::riscv_vfmv_v_f:
9488 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9489 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9490 case Intrinsic::riscv_vmv_s_x: {
9491 SDValue Scalar = Op.getOperand(2);
9492
9493 if (Scalar.getValueType().bitsLE(XLenVT)) {
9494 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9495 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9496 Op.getOperand(1), Scalar, Op.getOperand(3));
9497 }
9498
9499 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9500
9501 // This is an i64 value that lives in two scalar registers. We have to
9502 // insert this in a convoluted way. First we build vXi64 splat containing
9503 // the two values that we assemble using some bit math. Next we'll use
9504 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9505 // to merge element 0 from our splat into the source vector.
9506 // FIXME: This is probably not the best way to do this, but it is
9507 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9508 // point.
9509 // sw lo, (a0)
9510 // sw hi, 4(a0)
9511 // vlse vX, (a0)
9512 //
9513 // vid.v vVid
9514 // vmseq.vx mMask, vVid, 0
9515 // vmerge.vvm vDest, vSrc, vVal, mMask
9516 MVT VT = Op.getSimpleValueType();
9517 SDValue Vec = Op.getOperand(1);
9518 SDValue VL = getVLOperand(Op);
9519
9520 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9521 if (Op.getOperand(1).isUndef())
9522 return SplattedVal;
9523 SDValue SplattedIdx =
9524 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9525 DAG.getConstant(0, DL, MVT::i32), VL);
9526
9527 MVT MaskVT = getMaskTypeFor(VT);
9528 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9529 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9530 SDValue SelectCond =
9531 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9532 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9533 DAG.getUNDEF(MaskVT), Mask, VL});
9534 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9535 Vec, DAG.getUNDEF(VT), VL);
9536 }
9537 case Intrinsic::riscv_vfmv_s_f:
9538 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9539 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9540 // EGS * EEW >= 128 bits
9541 case Intrinsic::riscv_vaesdf_vv:
9542 case Intrinsic::riscv_vaesdf_vs:
9543 case Intrinsic::riscv_vaesdm_vv:
9544 case Intrinsic::riscv_vaesdm_vs:
9545 case Intrinsic::riscv_vaesef_vv:
9546 case Intrinsic::riscv_vaesef_vs:
9547 case Intrinsic::riscv_vaesem_vv:
9548 case Intrinsic::riscv_vaesem_vs:
9549 case Intrinsic::riscv_vaeskf1:
9550 case Intrinsic::riscv_vaeskf2:
9551 case Intrinsic::riscv_vaesz_vs:
9552 case Intrinsic::riscv_vsm4k:
9553 case Intrinsic::riscv_vsm4r_vv:
9554 case Intrinsic::riscv_vsm4r_vs: {
9555 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9556 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9557 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9558 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9559 return Op;
9560 }
9561 // EGS * EEW >= 256 bits
9562 case Intrinsic::riscv_vsm3c:
9563 case Intrinsic::riscv_vsm3me: {
9564 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9565 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9566 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9567 return Op;
9568 }
9569 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9570 case Intrinsic::riscv_vsha2ch:
9571 case Intrinsic::riscv_vsha2cl:
9572 case Intrinsic::riscv_vsha2ms: {
9573 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9574 !Subtarget.hasStdExtZvknhb())
9575 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9576 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9577 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9578 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9579 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9580 return Op;
9581 }
9582 case Intrinsic::riscv_sf_vc_v_x:
9583 case Intrinsic::riscv_sf_vc_v_i:
9584 case Intrinsic::riscv_sf_vc_v_xv:
9585 case Intrinsic::riscv_sf_vc_v_iv:
9586 case Intrinsic::riscv_sf_vc_v_vv:
9587 case Intrinsic::riscv_sf_vc_v_fv:
9588 case Intrinsic::riscv_sf_vc_v_xvv:
9589 case Intrinsic::riscv_sf_vc_v_ivv:
9590 case Intrinsic::riscv_sf_vc_v_vvv:
9591 case Intrinsic::riscv_sf_vc_v_fvv:
9592 case Intrinsic::riscv_sf_vc_v_xvw:
9593 case Intrinsic::riscv_sf_vc_v_ivw:
9594 case Intrinsic::riscv_sf_vc_v_vvw:
9595 case Intrinsic::riscv_sf_vc_v_fvw: {
9596 MVT VT = Op.getSimpleValueType();
9597
9598 SmallVector<SDValue> Operands{Op->op_values()};
9600
9601 MVT RetVT = VT;
9602 if (VT.isFixedLengthVector())
9604 else if (VT.isFloatingPoint())
9607
9608 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9609
9610 if (VT.isFixedLengthVector())
9611 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9612 else if (VT.isFloatingPoint())
9613 NewNode = DAG.getBitcast(VT, NewNode);
9614
9615 if (Op == NewNode)
9616 break;
9617
9618 return NewNode;
9619 }
9620 }
9621
9622 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9623}
9624
9626 unsigned Type) {
9627 SDLoc DL(Op);
9628 SmallVector<SDValue> Operands{Op->op_values()};
9629 Operands.erase(Operands.begin() + 1);
9630
9631 const RISCVSubtarget &Subtarget =
9633 MVT VT = Op.getSimpleValueType();
9634 MVT RetVT = VT;
9635 MVT FloatVT = VT;
9636
9637 if (VT.isFloatingPoint()) {
9638 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9640 FloatVT = RetVT;
9641 }
9642 if (VT.isFixedLengthVector())
9644 Subtarget);
9645
9647
9648 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9649 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9650 SDValue Chain = NewNode.getValue(1);
9651
9652 if (VT.isFixedLengthVector())
9653 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9654 if (VT.isFloatingPoint())
9655 NewNode = DAG.getBitcast(VT, NewNode);
9656
9657 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9658
9659 return NewNode;
9660}
9661
9663 unsigned Type) {
9664 SmallVector<SDValue> Operands{Op->op_values()};
9665 Operands.erase(Operands.begin() + 1);
9667
9668 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9669}
9670
9671SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9672 SelectionDAG &DAG) const {
9673 unsigned IntNo = Op.getConstantOperandVal(1);
9674 switch (IntNo) {
9675 default:
9676 break;
9677 case Intrinsic::riscv_seg2_load:
9678 case Intrinsic::riscv_seg3_load:
9679 case Intrinsic::riscv_seg4_load:
9680 case Intrinsic::riscv_seg5_load:
9681 case Intrinsic::riscv_seg6_load:
9682 case Intrinsic::riscv_seg7_load:
9683 case Intrinsic::riscv_seg8_load: {
9684 SDLoc DL(Op);
9685 static const Intrinsic::ID VlsegInts[7] = {
9686 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9687 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9688 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9689 Intrinsic::riscv_vlseg8};
9690 unsigned NF = Op->getNumValues() - 1;
9691 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9692 MVT XLenVT = Subtarget.getXLenVT();
9693 MVT VT = Op->getSimpleValueType(0);
9694 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9695 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
9696 ContainerVT.getScalarSizeInBits();
9697 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
9698
9699 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9700 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9702
9703 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
9704 SDValue Ops[] = {
9705 Load->getChain(),
9706 IntID,
9707 DAG.getUNDEF(VecTupTy),
9708 Op.getOperand(2),
9709 VL,
9710 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
9711 SDValue Result =
9713 Load->getMemoryVT(), Load->getMemOperand());
9715 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
9716 SDValue SubVec =
9717 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
9718 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
9719 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
9720 }
9721 Results.push_back(Result.getValue(1));
9722 return DAG.getMergeValues(Results, DL);
9723 }
9724 case Intrinsic::riscv_sf_vc_v_x_se:
9726 case Intrinsic::riscv_sf_vc_v_i_se:
9728 case Intrinsic::riscv_sf_vc_v_xv_se:
9730 case Intrinsic::riscv_sf_vc_v_iv_se:
9732 case Intrinsic::riscv_sf_vc_v_vv_se:
9734 case Intrinsic::riscv_sf_vc_v_fv_se:
9736 case Intrinsic::riscv_sf_vc_v_xvv_se:
9738 case Intrinsic::riscv_sf_vc_v_ivv_se:
9740 case Intrinsic::riscv_sf_vc_v_vvv_se:
9742 case Intrinsic::riscv_sf_vc_v_fvv_se:
9744 case Intrinsic::riscv_sf_vc_v_xvw_se:
9746 case Intrinsic::riscv_sf_vc_v_ivw_se:
9748 case Intrinsic::riscv_sf_vc_v_vvw_se:
9750 case Intrinsic::riscv_sf_vc_v_fvw_se:
9752 }
9753
9754 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9755}
9756
9757SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9758 SelectionDAG &DAG) const {
9759 unsigned IntNo = Op.getConstantOperandVal(1);
9760 switch (IntNo) {
9761 default:
9762 break;
9763 case Intrinsic::riscv_seg2_store:
9764 case Intrinsic::riscv_seg3_store:
9765 case Intrinsic::riscv_seg4_store:
9766 case Intrinsic::riscv_seg5_store:
9767 case Intrinsic::riscv_seg6_store:
9768 case Intrinsic::riscv_seg7_store:
9769 case Intrinsic::riscv_seg8_store: {
9770 SDLoc DL(Op);
9771 static const Intrinsic::ID VssegInts[] = {
9772 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9773 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9774 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9775 Intrinsic::riscv_vsseg8};
9776 // Operands are (chain, int_id, vec*, ptr, vl)
9777 unsigned NF = Op->getNumOperands() - 4;
9778 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9779 MVT XLenVT = Subtarget.getXLenVT();
9780 MVT VT = Op->getOperand(2).getSimpleValueType();
9781 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9782 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
9783 ContainerVT.getScalarSizeInBits();
9784 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
9785
9786 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9787 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9788 SDValue Ptr = Op->getOperand(NF + 2);
9789
9790 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9791
9792 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
9793 for (unsigned i = 0; i < NF; i++)
9794 StoredVal = DAG.getNode(
9795 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
9797 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
9798 DAG.getVectorIdxConstant(i, DL));
9799
9800 SDValue Ops[] = {
9801 FixedIntrinsic->getChain(),
9802 IntID,
9803 StoredVal,
9804 Ptr,
9805 VL,
9806 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
9807
9808 return DAG.getMemIntrinsicNode(
9809 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9810 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9811 }
9812 case Intrinsic::riscv_sf_vc_xv_se:
9814 case Intrinsic::riscv_sf_vc_iv_se:
9816 case Intrinsic::riscv_sf_vc_vv_se:
9818 case Intrinsic::riscv_sf_vc_fv_se:
9820 case Intrinsic::riscv_sf_vc_xvv_se:
9822 case Intrinsic::riscv_sf_vc_ivv_se:
9824 case Intrinsic::riscv_sf_vc_vvv_se:
9826 case Intrinsic::riscv_sf_vc_fvv_se:
9828 case Intrinsic::riscv_sf_vc_xvw_se:
9830 case Intrinsic::riscv_sf_vc_ivw_se:
9832 case Intrinsic::riscv_sf_vc_vvw_se:
9834 case Intrinsic::riscv_sf_vc_fvw_se:
9836 }
9837
9838 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9839}
9840
9841static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9842 switch (ISDOpcode) {
9843 default:
9844 llvm_unreachable("Unhandled reduction");
9845 case ISD::VP_REDUCE_ADD:
9846 case ISD::VECREDUCE_ADD:
9848 case ISD::VP_REDUCE_UMAX:
9849 case ISD::VECREDUCE_UMAX:
9851 case ISD::VP_REDUCE_SMAX:
9852 case ISD::VECREDUCE_SMAX:
9854 case ISD::VP_REDUCE_UMIN:
9855 case ISD::VECREDUCE_UMIN:
9857 case ISD::VP_REDUCE_SMIN:
9858 case ISD::VECREDUCE_SMIN:
9860 case ISD::VP_REDUCE_AND:
9861 case ISD::VECREDUCE_AND:
9863 case ISD::VP_REDUCE_OR:
9864 case ISD::VECREDUCE_OR:
9866 case ISD::VP_REDUCE_XOR:
9867 case ISD::VECREDUCE_XOR:
9869 case ISD::VP_REDUCE_FADD:
9871 case ISD::VP_REDUCE_SEQ_FADD:
9873 case ISD::VP_REDUCE_FMAX:
9874 case ISD::VP_REDUCE_FMAXIMUM:
9876 case ISD::VP_REDUCE_FMIN:
9877 case ISD::VP_REDUCE_FMINIMUM:
9879 }
9880
9881}
9882
9883SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9884 SelectionDAG &DAG,
9885 bool IsVP) const {
9886 SDLoc DL(Op);
9887 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9888 MVT VecVT = Vec.getSimpleValueType();
9889 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9890 Op.getOpcode() == ISD::VECREDUCE_OR ||
9891 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9892 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9893 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9894 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9895 "Unexpected reduction lowering");
9896
9897 MVT XLenVT = Subtarget.getXLenVT();
9898
9899 MVT ContainerVT = VecVT;
9900 if (VecVT.isFixedLengthVector()) {
9901 ContainerVT = getContainerForFixedLengthVector(VecVT);
9902 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9903 }
9904
9905 SDValue Mask, VL;
9906 if (IsVP) {
9907 Mask = Op.getOperand(2);
9908 VL = Op.getOperand(3);
9909 } else {
9910 std::tie(Mask, VL) =
9911 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9912 }
9913
9915 switch (Op.getOpcode()) {
9916 default:
9917 llvm_unreachable("Unhandled reduction");
9918 case ISD::VECREDUCE_AND:
9919 case ISD::VP_REDUCE_AND: {
9920 // vcpop ~x == 0
9921 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9922 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9923 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9924 CC = ISD::SETEQ;
9925 break;
9926 }
9927 case ISD::VECREDUCE_OR:
9928 case ISD::VP_REDUCE_OR:
9929 // vcpop x != 0
9930 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9931 CC = ISD::SETNE;
9932 break;
9933 case ISD::VECREDUCE_XOR:
9934 case ISD::VP_REDUCE_XOR: {
9935 // ((vcpop x) & 1) != 0
9936 SDValue One = DAG.getConstant(1, DL, XLenVT);
9937 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9938 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9939 CC = ISD::SETNE;
9940 break;
9941 }
9942 }
9943
9944 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9945 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9946 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9947
9948 if (!IsVP)
9949 return SetCC;
9950
9951 // Now include the start value in the operation.
9952 // Note that we must return the start value when no elements are operated
9953 // upon. The vcpop instructions we've emitted in each case above will return
9954 // 0 for an inactive vector, and so we've already received the neutral value:
9955 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9956 // can simply include the start value.
9957 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9958 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9959}
9960
9961static bool isNonZeroAVL(SDValue AVL) {
9962 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9963 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9964 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9965 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9966}
9967
9968/// Helper to lower a reduction sequence of the form:
9969/// scalar = reduce_op vec, scalar_start
9970static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9971 SDValue StartValue, SDValue Vec, SDValue Mask,
9972 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9973 const RISCVSubtarget &Subtarget) {
9974 const MVT VecVT = Vec.getSimpleValueType();
9975 const MVT M1VT = getLMUL1VT(VecVT);
9976 const MVT XLenVT = Subtarget.getXLenVT();
9977 const bool NonZeroAVL = isNonZeroAVL(VL);
9978
9979 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9980 // or the original VT if fractional.
9981 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9982 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9983 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9984 // be the result of the reduction operation.
9985 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9986 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9987 DAG, Subtarget);
9988 if (M1VT != InnerVT)
9989 InitialValue =
9990 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9991 InitialValue, DAG.getVectorIdxConstant(0, DL));
9992 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9993 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9994 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9995 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9996 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9997 DAG.getVectorIdxConstant(0, DL));
9998}
9999
10000SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10001 SelectionDAG &DAG) const {
10002 SDLoc DL(Op);
10003 SDValue Vec = Op.getOperand(0);
10004 EVT VecEVT = Vec.getValueType();
10005
10006 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10007
10008 // Due to ordering in legalize types we may have a vector type that needs to
10009 // be split. Do that manually so we can get down to a legal type.
10010 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10012 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10013 VecEVT = Lo.getValueType();
10014 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10015 }
10016
10017 // TODO: The type may need to be widened rather than split. Or widened before
10018 // it can be split.
10019 if (!isTypeLegal(VecEVT))
10020 return SDValue();
10021
10022 MVT VecVT = VecEVT.getSimpleVT();
10023 MVT VecEltVT = VecVT.getVectorElementType();
10024 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10025
10026 MVT ContainerVT = VecVT;
10027 if (VecVT.isFixedLengthVector()) {
10028 ContainerVT = getContainerForFixedLengthVector(VecVT);
10029 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10030 }
10031
10032 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10033
10034 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10035 switch (BaseOpc) {
10036 case ISD::AND:
10037 case ISD::OR:
10038 case ISD::UMAX:
10039 case ISD::UMIN:
10040 case ISD::SMAX:
10041 case ISD::SMIN:
10042 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10043 DAG.getVectorIdxConstant(0, DL));
10044 }
10045 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10046 Mask, VL, DL, DAG, Subtarget);
10047}
10048
10049// Given a reduction op, this function returns the matching reduction opcode,
10050// the vector SDValue and the scalar SDValue required to lower this to a
10051// RISCVISD node.
10052static std::tuple<unsigned, SDValue, SDValue>
10054 const RISCVSubtarget &Subtarget) {
10055 SDLoc DL(Op);
10056 auto Flags = Op->getFlags();
10057 unsigned Opcode = Op.getOpcode();
10058 switch (Opcode) {
10059 default:
10060 llvm_unreachable("Unhandled reduction");
10061 case ISD::VECREDUCE_FADD: {
10062 // Use positive zero if we can. It is cheaper to materialize.
10063 SDValue Zero =
10064 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10065 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10066 }
10067 case ISD::VECREDUCE_SEQ_FADD:
10068 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10069 Op.getOperand(0));
10070 case ISD::VECREDUCE_FMINIMUM:
10071 case ISD::VECREDUCE_FMAXIMUM:
10072 case ISD::VECREDUCE_FMIN:
10073 case ISD::VECREDUCE_FMAX: {
10074 SDValue Front =
10075 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10076 DAG.getVectorIdxConstant(0, DL));
10077 unsigned RVVOpc =
10078 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10081 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10082 }
10083 }
10084}
10085
10086SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10087 SelectionDAG &DAG) const {
10088 SDLoc DL(Op);
10089 MVT VecEltVT = Op.getSimpleValueType();
10090
10091 unsigned RVVOpcode;
10092 SDValue VectorVal, ScalarVal;
10093 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10094 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10095 MVT VecVT = VectorVal.getSimpleValueType();
10096
10097 MVT ContainerVT = VecVT;
10098 if (VecVT.isFixedLengthVector()) {
10099 ContainerVT = getContainerForFixedLengthVector(VecVT);
10100 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10101 }
10102
10103 MVT ResVT = Op.getSimpleValueType();
10104 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10105 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10106 VL, DL, DAG, Subtarget);
10107 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10108 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10109 return Res;
10110
10111 if (Op->getFlags().hasNoNaNs())
10112 return Res;
10113
10114 // Force output to NaN if any element is Nan.
10115 SDValue IsNan =
10116 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10117 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10118 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10119 MVT XLenVT = Subtarget.getXLenVT();
10120 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10121 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10122 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10123 return DAG.getSelect(
10124 DL, ResVT, NoNaNs, Res,
10125 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10126}
10127
10128SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10129 SelectionDAG &DAG) const {
10130 SDLoc DL(Op);
10131 unsigned Opc = Op.getOpcode();
10132 SDValue Start = Op.getOperand(0);
10133 SDValue Vec = Op.getOperand(1);
10134 EVT VecEVT = Vec.getValueType();
10135 MVT XLenVT = Subtarget.getXLenVT();
10136
10137 // TODO: The type may need to be widened rather than split. Or widened before
10138 // it can be split.
10139 if (!isTypeLegal(VecEVT))
10140 return SDValue();
10141
10142 MVT VecVT = VecEVT.getSimpleVT();
10143 unsigned RVVOpcode = getRVVReductionOp(Opc);
10144
10145 if (VecVT.isFixedLengthVector()) {
10146 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10147 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10148 }
10149
10150 SDValue VL = Op.getOperand(3);
10151 SDValue Mask = Op.getOperand(2);
10152 SDValue Res =
10153 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10154 Vec, Mask, VL, DL, DAG, Subtarget);
10155 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10156 Op->getFlags().hasNoNaNs())
10157 return Res;
10158
10159 // Propagate NaNs.
10160 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10161 // Check if any of the elements in Vec is NaN.
10162 SDValue IsNaN = DAG.getNode(
10163 RISCVISD::SETCC_VL, DL, PredVT,
10164 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10165 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10166 // Check if the start value is NaN.
10167 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10168 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10169 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10170 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10171 MVT ResVT = Res.getSimpleValueType();
10172 return DAG.getSelect(
10173 DL, ResVT, NoNaNs, Res,
10174 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10175}
10176
10177SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10178 SelectionDAG &DAG) const {
10179 SDValue Vec = Op.getOperand(0);
10180 SDValue SubVec = Op.getOperand(1);
10181 MVT VecVT = Vec.getSimpleValueType();
10182 MVT SubVecVT = SubVec.getSimpleValueType();
10183
10184 SDLoc DL(Op);
10185 MVT XLenVT = Subtarget.getXLenVT();
10186 unsigned OrigIdx = Op.getConstantOperandVal(2);
10187 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10188
10189 if (OrigIdx == 0 && Vec.isUndef())
10190 return Op;
10191
10192 // We don't have the ability to slide mask vectors up indexed by their i1
10193 // elements; the smallest we can do is i8. Often we are able to bitcast to
10194 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10195 // into a scalable one, we might not necessarily have enough scalable
10196 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10197 if (SubVecVT.getVectorElementType() == MVT::i1) {
10198 if (VecVT.getVectorMinNumElements() >= 8 &&
10199 SubVecVT.getVectorMinNumElements() >= 8) {
10200 assert(OrigIdx % 8 == 0 && "Invalid index");
10201 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10202 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10203 "Unexpected mask vector lowering");
10204 OrigIdx /= 8;
10205 SubVecVT =
10206 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10207 SubVecVT.isScalableVector());
10208 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10209 VecVT.isScalableVector());
10210 Vec = DAG.getBitcast(VecVT, Vec);
10211 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10212 } else {
10213 // We can't slide this mask vector up indexed by its i1 elements.
10214 // This poses a problem when we wish to insert a scalable vector which
10215 // can't be re-expressed as a larger type. Just choose the slow path and
10216 // extend to a larger type, then truncate back down.
10217 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10218 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10219 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10220 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10221 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10222 Op.getOperand(2));
10223 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10224 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10225 }
10226 }
10227
10228 // If the subvector vector is a fixed-length type and we don't know VLEN
10229 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10230 // don't know which register of a LMUL group contains the specific subvector
10231 // as we only know the minimum register size. Therefore we must slide the
10232 // vector group up the full amount.
10233 const auto VLen = Subtarget.getRealVLen();
10234 if (SubVecVT.isFixedLengthVector() && !VLen) {
10235 MVT ContainerVT = VecVT;
10236 if (VecVT.isFixedLengthVector()) {
10237 ContainerVT = getContainerForFixedLengthVector(VecVT);
10238 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10239 }
10240
10241 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10242 DAG.getUNDEF(ContainerVT), SubVec,
10243 DAG.getVectorIdxConstant(0, DL));
10244
10245 SDValue Mask =
10246 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10247 // Set the vector length to only the number of elements we care about. Note
10248 // that for slideup this includes the offset.
10249 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10250 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10251
10252 // Use tail agnostic policy if we're inserting over Vec's tail.
10254 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10255 Policy = RISCVII::TAIL_AGNOSTIC;
10256
10257 // If we're inserting into the lowest elements, use a tail undisturbed
10258 // vmv.v.v.
10259 if (OrigIdx == 0) {
10260 SubVec =
10261 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10262 } else {
10263 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10264 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10265 SlideupAmt, Mask, VL, Policy);
10266 }
10267
10268 if (VecVT.isFixedLengthVector())
10269 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10270 return DAG.getBitcast(Op.getValueType(), SubVec);
10271 }
10272
10273 MVT ContainerVecVT = VecVT;
10274 if (VecVT.isFixedLengthVector()) {
10275 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10276 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10277 }
10278
10279 MVT ContainerSubVecVT = SubVecVT;
10280 if (SubVecVT.isFixedLengthVector()) {
10281 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10282 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10283 }
10284
10285 unsigned SubRegIdx;
10286 ElementCount RemIdx;
10287 // insert_subvector scales the index by vscale if the subvector is scalable,
10288 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10289 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10290 if (SubVecVT.isFixedLengthVector()) {
10291 assert(VLen);
10292 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10293 auto Decompose =
10295 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10296 SubRegIdx = Decompose.first;
10297 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10298 (OrigIdx % Vscale));
10299 } else {
10300 auto Decompose =
10302 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10303 SubRegIdx = Decompose.first;
10304 RemIdx = ElementCount::getScalable(Decompose.second);
10305 }
10306
10309 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10310 bool ExactlyVecRegSized =
10311 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10312 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10313
10314 // 1. If the Idx has been completely eliminated and this subvector's size is
10315 // a vector register or a multiple thereof, or the surrounding elements are
10316 // undef, then this is a subvector insert which naturally aligns to a vector
10317 // register. These can easily be handled using subregister manipulation.
10318 // 2. If the subvector isn't an exact multiple of a valid register group size,
10319 // then the insertion must preserve the undisturbed elements of the register.
10320 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10321 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10322 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10323 // of that LMUL=1 type back into the larger vector (resolving to another
10324 // subregister operation). See below for how our VSLIDEUP works. We go via a
10325 // LMUL=1 type to avoid allocating a large register group to hold our
10326 // subvector.
10327 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10328 if (SubVecVT.isFixedLengthVector()) {
10329 // We may get NoSubRegister if inserting at index 0 and the subvec
10330 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10331 if (SubRegIdx == RISCV::NoSubRegister) {
10332 assert(OrigIdx == 0);
10333 return Op;
10334 }
10335
10336 // Use a insert_subvector that will resolve to an insert subreg.
10337 assert(VLen);
10338 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10339 SDValue Insert =
10340 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10341 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10342 if (VecVT.isFixedLengthVector())
10343 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10344 return Insert;
10345 }
10346 return Op;
10347 }
10348
10349 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10350 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10351 // (in our case undisturbed). This means we can set up a subvector insertion
10352 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10353 // size of the subvector.
10354 MVT InterSubVT = ContainerVecVT;
10355 SDValue AlignedExtract = Vec;
10356 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10357 if (SubVecVT.isFixedLengthVector()) {
10358 assert(VLen);
10359 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10360 }
10361 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10362 InterSubVT = getLMUL1VT(ContainerVecVT);
10363 // Extract a subvector equal to the nearest full vector register type. This
10364 // should resolve to a EXTRACT_SUBREG instruction.
10365 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10366 DAG.getVectorIdxConstant(AlignedIdx, DL));
10367 }
10368
10369 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10370 DAG.getUNDEF(InterSubVT), SubVec,
10371 DAG.getVectorIdxConstant(0, DL));
10372
10373 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10374
10375 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10376 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10377
10378 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10380 if (Subtarget.expandVScale(EndIndex) ==
10381 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10382 Policy = RISCVII::TAIL_AGNOSTIC;
10383
10384 // If we're inserting into the lowest elements, use a tail undisturbed
10385 // vmv.v.v.
10386 if (RemIdx.isZero()) {
10387 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10388 SubVec, VL);
10389 } else {
10390 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10391
10392 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10393 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10394
10395 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10396 SlideupAmt, Mask, VL, Policy);
10397 }
10398
10399 // If required, insert this subvector back into the correct vector register.
10400 // This should resolve to an INSERT_SUBREG instruction.
10401 if (ContainerVecVT.bitsGT(InterSubVT))
10402 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10403 DAG.getVectorIdxConstant(AlignedIdx, DL));
10404
10405 if (VecVT.isFixedLengthVector())
10406 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10407
10408 // We might have bitcast from a mask type: cast back to the original type if
10409 // required.
10410 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10411}
10412
10413SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10414 SelectionDAG &DAG) const {
10415 SDValue Vec = Op.getOperand(0);
10416 MVT SubVecVT = Op.getSimpleValueType();
10417 MVT VecVT = Vec.getSimpleValueType();
10418
10419 SDLoc DL(Op);
10420 MVT XLenVT = Subtarget.getXLenVT();
10421 unsigned OrigIdx = Op.getConstantOperandVal(1);
10422 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10423
10424 // With an index of 0 this is a cast-like subvector, which can be performed
10425 // with subregister operations.
10426 if (OrigIdx == 0)
10427 return Op;
10428
10429 // We don't have the ability to slide mask vectors down indexed by their i1
10430 // elements; the smallest we can do is i8. Often we are able to bitcast to
10431 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10432 // from a scalable one, we might not necessarily have enough scalable
10433 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10434 if (SubVecVT.getVectorElementType() == MVT::i1) {
10435 if (VecVT.getVectorMinNumElements() >= 8 &&
10436 SubVecVT.getVectorMinNumElements() >= 8) {
10437 assert(OrigIdx % 8 == 0 && "Invalid index");
10438 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10439 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10440 "Unexpected mask vector lowering");
10441 OrigIdx /= 8;
10442 SubVecVT =
10443 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10444 SubVecVT.isScalableVector());
10445 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10446 VecVT.isScalableVector());
10447 Vec = DAG.getBitcast(VecVT, Vec);
10448 } else {
10449 // We can't slide this mask vector down, indexed by its i1 elements.
10450 // This poses a problem when we wish to extract a scalable vector which
10451 // can't be re-expressed as a larger type. Just choose the slow path and
10452 // extend to a larger type, then truncate back down.
10453 // TODO: We could probably improve this when extracting certain fixed
10454 // from fixed, where we can extract as i8 and shift the correct element
10455 // right to reach the desired subvector?
10456 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10457 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10458 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10459 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10460 Op.getOperand(1));
10461 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10462 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10463 }
10464 }
10465
10466 const auto VLen = Subtarget.getRealVLen();
10467
10468 // If the subvector vector is a fixed-length type and we don't know VLEN
10469 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10470 // don't know which register of a LMUL group contains the specific subvector
10471 // as we only know the minimum register size. Therefore we must slide the
10472 // vector group down the full amount.
10473 if (SubVecVT.isFixedLengthVector() && !VLen) {
10474 MVT ContainerVT = VecVT;
10475 if (VecVT.isFixedLengthVector()) {
10476 ContainerVT = getContainerForFixedLengthVector(VecVT);
10477 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10478 }
10479
10480 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10481 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10482 if (auto ShrunkVT =
10483 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10484 ContainerVT = *ShrunkVT;
10485 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10486 DAG.getVectorIdxConstant(0, DL));
10487 }
10488
10489 SDValue Mask =
10490 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10491 // Set the vector length to only the number of elements we care about. This
10492 // avoids sliding down elements we're going to discard straight away.
10493 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10494 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10495 SDValue Slidedown =
10496 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10497 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10498 // Now we can use a cast-like subvector extract to get the result.
10499 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10500 DAG.getVectorIdxConstant(0, DL));
10501 return DAG.getBitcast(Op.getValueType(), Slidedown);
10502 }
10503
10504 if (VecVT.isFixedLengthVector()) {
10505 VecVT = getContainerForFixedLengthVector(VecVT);
10506 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10507 }
10508
10509 MVT ContainerSubVecVT = SubVecVT;
10510 if (SubVecVT.isFixedLengthVector())
10511 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10512
10513 unsigned SubRegIdx;
10514 ElementCount RemIdx;
10515 // extract_subvector scales the index by vscale if the subvector is scalable,
10516 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10517 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10518 if (SubVecVT.isFixedLengthVector()) {
10519 assert(VLen);
10520 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10521 auto Decompose =
10523 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10524 SubRegIdx = Decompose.first;
10525 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10526 (OrigIdx % Vscale));
10527 } else {
10528 auto Decompose =
10530 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10531 SubRegIdx = Decompose.first;
10532 RemIdx = ElementCount::getScalable(Decompose.second);
10533 }
10534
10535 // If the Idx has been completely eliminated then this is a subvector extract
10536 // which naturally aligns to a vector register. These can easily be handled
10537 // using subregister manipulation. We use an extract_subvector that will
10538 // resolve to an extract subreg.
10539 if (RemIdx.isZero()) {
10540 if (SubVecVT.isFixedLengthVector()) {
10541 assert(VLen);
10542 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10543 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10544 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10545 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10546 }
10547 return Op;
10548 }
10549
10550 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10551 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10552 // divide exactly.
10553 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10554 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10555
10556 // If the vector type is an LMUL-group type, extract a subvector equal to the
10557 // nearest full vector register type.
10558 MVT InterSubVT = VecVT;
10559 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10560 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10561 // we should have successfully decomposed the extract into a subregister.
10562 // We use an extract_subvector that will resolve to a subreg extract.
10563 assert(SubRegIdx != RISCV::NoSubRegister);
10564 (void)SubRegIdx;
10565 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10566 if (SubVecVT.isFixedLengthVector()) {
10567 assert(VLen);
10568 Idx /= *VLen / RISCV::RVVBitsPerBlock;
10569 }
10570 InterSubVT = getLMUL1VT(VecVT);
10571 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10572 DAG.getConstant(Idx, DL, XLenVT));
10573 }
10574
10575 // Slide this vector register down by the desired number of elements in order
10576 // to place the desired subvector starting at element 0.
10577 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10578 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10579 if (SubVecVT.isFixedLengthVector())
10580 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10581 SDValue Slidedown =
10582 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10583 Vec, SlidedownAmt, Mask, VL);
10584
10585 // Now the vector is in the right position, extract our final subvector. This
10586 // should resolve to a COPY.
10587 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10588 DAG.getVectorIdxConstant(0, DL));
10589
10590 // We might have bitcast from a mask type: cast back to the original type if
10591 // required.
10592 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10593}
10594
10595// Widen a vector's operands to i8, then truncate its results back to the
10596// original type, typically i1. All operand and result types must be the same.
10598 SelectionDAG &DAG) {
10599 MVT VT = N.getSimpleValueType();
10600 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10602 for (SDValue Op : N->ops()) {
10603 assert(Op.getSimpleValueType() == VT &&
10604 "Operands and result must be same type");
10605 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10606 }
10607
10608 unsigned NumVals = N->getNumValues();
10609
10611 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10612 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10613 SmallVector<SDValue, 4> TruncVals;
10614 for (unsigned I = 0; I < NumVals; I++) {
10615 TruncVals.push_back(
10616 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10617 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10618 }
10619
10620 if (TruncVals.size() > 1)
10621 return DAG.getMergeValues(TruncVals, DL);
10622 return TruncVals.front();
10623}
10624
10625SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10626 SelectionDAG &DAG) const {
10627 SDLoc DL(Op);
10628 MVT VecVT = Op.getSimpleValueType();
10629
10630 assert(VecVT.isScalableVector() &&
10631 "vector_interleave on non-scalable vector!");
10632
10633 // 1 bit element vectors need to be widened to e8
10634 if (VecVT.getVectorElementType() == MVT::i1)
10635 return widenVectorOpsToi8(Op, DL, DAG);
10636
10637 // If the VT is LMUL=8, we need to split and reassemble.
10638 if (VecVT.getSizeInBits().getKnownMinValue() ==
10639 (8 * RISCV::RVVBitsPerBlock)) {
10640 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10641 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10642 EVT SplitVT = Op0Lo.getValueType();
10643
10645 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10647 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10648
10649 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10650 ResLo.getValue(0), ResHi.getValue(0));
10651 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10652 ResHi.getValue(1));
10653 return DAG.getMergeValues({Even, Odd}, DL);
10654 }
10655
10656 // Concatenate the two vectors as one vector to deinterleave
10657 MVT ConcatVT =
10660 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10661 Op.getOperand(0), Op.getOperand(1));
10662
10663 // We want to operate on all lanes, so get the mask and VL and mask for it
10664 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10665 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10666
10667 // We can deinterleave through vnsrl.wi if the element type is smaller than
10668 // ELEN
10669 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10670 SDValue Even =
10671 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10672 SDValue Odd =
10673 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10674 return DAG.getMergeValues({Even, Odd}, DL);
10675 }
10676
10677 // For the indices, use the same SEW to avoid an extra vsetvli
10678 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10679 // Create a vector of even indices {0, 2, 4, ...}
10680 SDValue EvenIdx =
10681 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10682 // Create a vector of odd indices {1, 3, 5, ... }
10683 SDValue OddIdx =
10684 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10685
10686 // Gather the even and odd elements into two separate vectors
10687 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10688 Concat, EvenIdx, Passthru, Mask, VL);
10689 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10690 Concat, OddIdx, Passthru, Mask, VL);
10691
10692 // Extract the result half of the gather for even and odd
10693 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10694 DAG.getVectorIdxConstant(0, DL));
10695 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10696 DAG.getVectorIdxConstant(0, DL));
10697
10698 return DAG.getMergeValues({Even, Odd}, DL);
10699}
10700
10701SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10702 SelectionDAG &DAG) const {
10703 SDLoc DL(Op);
10704 MVT VecVT = Op.getSimpleValueType();
10705
10706 assert(VecVT.isScalableVector() &&
10707 "vector_interleave on non-scalable vector!");
10708
10709 // i1 vectors need to be widened to i8
10710 if (VecVT.getVectorElementType() == MVT::i1)
10711 return widenVectorOpsToi8(Op, DL, DAG);
10712
10713 MVT XLenVT = Subtarget.getXLenVT();
10714 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10715
10716 // If the VT is LMUL=8, we need to split and reassemble.
10717 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10718 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10719 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10720 EVT SplitVT = Op0Lo.getValueType();
10721
10723 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10725 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10726
10727 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10728 ResLo.getValue(0), ResLo.getValue(1));
10729 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10730 ResHi.getValue(0), ResHi.getValue(1));
10731 return DAG.getMergeValues({Lo, Hi}, DL);
10732 }
10733
10734 SDValue Interleaved;
10735
10736 // If the element type is smaller than ELEN, then we can interleave with
10737 // vwaddu.vv and vwmaccu.vx
10738 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10739 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10740 DAG, Subtarget);
10741 } else {
10742 // Otherwise, fallback to using vrgathere16.vv
10743 MVT ConcatVT =
10746 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10747 Op.getOperand(0), Op.getOperand(1));
10748
10749 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10750
10751 // 0 1 2 3 4 5 6 7 ...
10752 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10753
10754 // 1 1 1 1 1 1 1 1 ...
10755 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10756
10757 // 1 0 1 0 1 0 1 0 ...
10758 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10759 OddMask = DAG.getSetCC(
10760 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10761 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10763
10764 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10765
10766 // Build up the index vector for interleaving the concatenated vector
10767 // 0 0 1 1 2 2 3 3 ...
10768 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10769 // 0 n 1 n+1 2 n+2 3 n+3 ...
10770 Idx =
10771 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10772
10773 // Then perform the interleave
10774 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10775 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10776 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10777 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10778 }
10779
10780 // Extract the two halves from the interleaved result
10781 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10782 DAG.getVectorIdxConstant(0, DL));
10783 SDValue Hi = DAG.getNode(
10784 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10786
10787 return DAG.getMergeValues({Lo, Hi}, DL);
10788}
10789
10790// Lower step_vector to the vid instruction. Any non-identity step value must
10791// be accounted for my manual expansion.
10792SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10793 SelectionDAG &DAG) const {
10794 SDLoc DL(Op);
10795 MVT VT = Op.getSimpleValueType();
10796 assert(VT.isScalableVector() && "Expected scalable vector");
10797 MVT XLenVT = Subtarget.getXLenVT();
10798 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10799 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10800 uint64_t StepValImm = Op.getConstantOperandVal(0);
10801 if (StepValImm != 1) {
10802 if (isPowerOf2_64(StepValImm)) {
10803 SDValue StepVal =
10804 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10805 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10806 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10807 } else {
10808 SDValue StepVal = lowerScalarSplat(
10809 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10810 VL, VT, DL, DAG, Subtarget);
10811 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10812 }
10813 }
10814 return StepVec;
10815}
10816
10817// Implement vector_reverse using vrgather.vv with indices determined by
10818// subtracting the id of each element from (VLMAX-1). This will convert
10819// the indices like so:
10820// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10821// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10822SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10823 SelectionDAG &DAG) const {
10824 SDLoc DL(Op);
10825 MVT VecVT = Op.getSimpleValueType();
10826 if (VecVT.getVectorElementType() == MVT::i1) {
10827 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10828 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10829 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10830 return DAG.getSetCC(DL, VecVT, Op2,
10831 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
10832 }
10833
10834 MVT ContainerVT = VecVT;
10835 SDValue Vec = Op.getOperand(0);
10836 if (VecVT.isFixedLengthVector()) {
10837 ContainerVT = getContainerForFixedLengthVector(VecVT);
10838 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10839 }
10840
10841 MVT XLenVT = Subtarget.getXLenVT();
10842 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10843
10844 // On some uarchs vrgather.vv will read from every input register for each
10845 // output register, regardless of the indices. However to reverse a vector
10846 // each output register only needs to read from one register. So decompose it
10847 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
10848 // O(LMUL^2).
10849 //
10850 // vsetvli a1, zero, e64, m4, ta, ma
10851 // vrgatherei16.vv v12, v8, v16
10852 // ->
10853 // vsetvli a1, zero, e64, m1, ta, ma
10854 // vrgather.vv v15, v8, v16
10855 // vrgather.vv v14, v9, v16
10856 // vrgather.vv v13, v10, v16
10857 // vrgather.vv v12, v11, v16
10858 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
10859 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
10860 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10861 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
10862 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
10863 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
10864
10865 // Fixed length vectors might not fit exactly into their container, and so
10866 // leave a gap in the front of the vector after being reversed. Slide this
10867 // away.
10868 //
10869 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
10870 // 0 1 2 3 x x x x <- reverse
10871 // x x x x 0 1 2 3 <- vslidedown.vx
10872 if (VecVT.isFixedLengthVector()) {
10873 SDValue Offset = DAG.getNode(
10874 ISD::SUB, DL, XLenVT,
10875 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
10876 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
10877 Concat =
10878 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10879 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
10880 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
10881 }
10882 return Concat;
10883 }
10884
10885 unsigned EltSize = ContainerVT.getScalarSizeInBits();
10886 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
10887 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10888 unsigned MaxVLMAX =
10889 VecVT.isFixedLengthVector()
10890 ? VecVT.getVectorNumElements()
10891 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10892
10893 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10894 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
10895
10896 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10897 // to use vrgatherei16.vv.
10898 if (MaxVLMAX > 256 && EltSize == 8) {
10899 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10900 // Reverse each half, then reassemble them in reverse order.
10901 // NOTE: It's also possible that after splitting that VLMAX no longer
10902 // requires vrgatherei16.vv.
10903 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10904 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10905 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10906 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10907 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10908 // Reassemble the low and high pieces reversed.
10909 // FIXME: This is a CONCAT_VECTORS.
10910 SDValue Res =
10911 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10912 DAG.getVectorIdxConstant(0, DL));
10913 return DAG.getNode(
10914 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10915 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10916 }
10917
10918 // Just promote the int type to i16 which will double the LMUL.
10919 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
10920 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10921 }
10922
10923 // At LMUL > 1, do the index computation in 16 bits to reduce register
10924 // pressure.
10925 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
10926 IntVT.bitsGT(getLMUL1VT(IntVT))) {
10927 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
10928 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10929 IntVT = IntVT.changeVectorElementType(MVT::i16);
10930 }
10931
10932 // Calculate VLMAX-1 for the desired SEW.
10933 SDValue VLMinus1 = DAG.getNode(
10934 ISD::SUB, DL, XLenVT,
10935 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
10936 DAG.getConstant(1, DL, XLenVT));
10937
10938 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10939 bool IsRV32E64 =
10940 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10941 SDValue SplatVL;
10942 if (!IsRV32E64)
10943 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10944 else
10945 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10946 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10947
10948 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10949 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10950 DAG.getUNDEF(IntVT), Mask, VL);
10951
10952 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
10953 DAG.getUNDEF(ContainerVT), Mask, VL);
10954 if (VecVT.isFixedLengthVector())
10955 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
10956 return Gather;
10957}
10958
10959SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10960 SelectionDAG &DAG) const {
10961 SDLoc DL(Op);
10962 SDValue V1 = Op.getOperand(0);
10963 SDValue V2 = Op.getOperand(1);
10964 MVT XLenVT = Subtarget.getXLenVT();
10965 MVT VecVT = Op.getSimpleValueType();
10966
10967 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10968
10969 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10970 SDValue DownOffset, UpOffset;
10971 if (ImmValue >= 0) {
10972 // The operand is a TargetConstant, we need to rebuild it as a regular
10973 // constant.
10974 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10975 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10976 } else {
10977 // The operand is a TargetConstant, we need to rebuild it as a regular
10978 // constant rather than negating the original operand.
10979 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10980 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10981 }
10982
10983 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10984
10985 SDValue SlideDown =
10986 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10987 DownOffset, TrueMask, UpOffset);
10988 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10989 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10991}
10992
10993SDValue
10994RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10995 SelectionDAG &DAG) const {
10996 SDLoc DL(Op);
10997 auto *Load = cast<LoadSDNode>(Op);
10998
11000 Load->getMemoryVT(),
11001 *Load->getMemOperand()) &&
11002 "Expecting a correctly-aligned load");
11003
11004 MVT VT = Op.getSimpleValueType();
11005 MVT XLenVT = Subtarget.getXLenVT();
11006 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11007
11008 // If we know the exact VLEN and our fixed length vector completely fills
11009 // the container, use a whole register load instead.
11010 const auto [MinVLMAX, MaxVLMAX] =
11011 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11012 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11013 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11014 MachineMemOperand *MMO = Load->getMemOperand();
11015 SDValue NewLoad =
11016 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11017 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11018 MMO->getAAInfo(), MMO->getRanges());
11019 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11020 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11021 }
11022
11023 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11024
11025 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11026 SDValue IntID = DAG.getTargetConstant(
11027 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11028 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11029 if (!IsMaskOp)
11030 Ops.push_back(DAG.getUNDEF(ContainerVT));
11031 Ops.push_back(Load->getBasePtr());
11032 Ops.push_back(VL);
11033 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11034 SDValue NewLoad =
11036 Load->getMemoryVT(), Load->getMemOperand());
11037
11038 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11039 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11040}
11041
11042SDValue
11043RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11044 SelectionDAG &DAG) const {
11045 SDLoc DL(Op);
11046 auto *Store = cast<StoreSDNode>(Op);
11047
11049 Store->getMemoryVT(),
11050 *Store->getMemOperand()) &&
11051 "Expecting a correctly-aligned store");
11052
11053 SDValue StoreVal = Store->getValue();
11054 MVT VT = StoreVal.getSimpleValueType();
11055 MVT XLenVT = Subtarget.getXLenVT();
11056
11057 // If the size less than a byte, we need to pad with zeros to make a byte.
11058 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11059 VT = MVT::v8i1;
11060 StoreVal =
11061 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11062 StoreVal, DAG.getVectorIdxConstant(0, DL));
11063 }
11064
11065 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11066
11067 SDValue NewValue =
11068 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11069
11070 // If we know the exact VLEN and our fixed length vector completely fills
11071 // the container, use a whole register store instead.
11072 const auto [MinVLMAX, MaxVLMAX] =
11073 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11074 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11075 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11076 MachineMemOperand *MMO = Store->getMemOperand();
11077 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11078 MMO->getPointerInfo(), MMO->getBaseAlign(),
11079 MMO->getFlags(), MMO->getAAInfo());
11080 }
11081
11082 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11083
11084 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11085 SDValue IntID = DAG.getTargetConstant(
11086 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11087 return DAG.getMemIntrinsicNode(
11088 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11089 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11090 Store->getMemoryVT(), Store->getMemOperand());
11091}
11092
11093SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11094 SelectionDAG &DAG) const {
11095 SDLoc DL(Op);
11096 MVT VT = Op.getSimpleValueType();
11097
11098 const auto *MemSD = cast<MemSDNode>(Op);
11099 EVT MemVT = MemSD->getMemoryVT();
11100 MachineMemOperand *MMO = MemSD->getMemOperand();
11101 SDValue Chain = MemSD->getChain();
11102 SDValue BasePtr = MemSD->getBasePtr();
11103
11104 SDValue Mask, PassThru, VL;
11105 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11106 Mask = VPLoad->getMask();
11107 PassThru = DAG.getUNDEF(VT);
11108 VL = VPLoad->getVectorLength();
11109 } else {
11110 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11111 Mask = MLoad->getMask();
11112 PassThru = MLoad->getPassThru();
11113 }
11114
11115 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11116
11117 MVT XLenVT = Subtarget.getXLenVT();
11118
11119 MVT ContainerVT = VT;
11120 if (VT.isFixedLengthVector()) {
11121 ContainerVT = getContainerForFixedLengthVector(VT);
11122 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11123 if (!IsUnmasked) {
11124 MVT MaskVT = getMaskTypeFor(ContainerVT);
11125 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11126 }
11127 }
11128
11129 if (!VL)
11130 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11131
11132 unsigned IntID =
11133 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
11134 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11135 if (IsUnmasked)
11136 Ops.push_back(DAG.getUNDEF(ContainerVT));
11137 else
11138 Ops.push_back(PassThru);
11139 Ops.push_back(BasePtr);
11140 if (!IsUnmasked)
11141 Ops.push_back(Mask);
11142 Ops.push_back(VL);
11143 if (!IsUnmasked)
11144 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11145
11146 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11147
11148 SDValue Result =
11149 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11150 Chain = Result.getValue(1);
11151
11152 if (VT.isFixedLengthVector())
11153 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11154
11155 return DAG.getMergeValues({Result, Chain}, DL);
11156}
11157
11158SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11159 SelectionDAG &DAG) const {
11160 SDLoc DL(Op);
11161
11162 const auto *MemSD = cast<MemSDNode>(Op);
11163 EVT MemVT = MemSD->getMemoryVT();
11164 MachineMemOperand *MMO = MemSD->getMemOperand();
11165 SDValue Chain = MemSD->getChain();
11166 SDValue BasePtr = MemSD->getBasePtr();
11167 SDValue Val, Mask, VL;
11168
11169 bool IsCompressingStore = false;
11170 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11171 Val = VPStore->getValue();
11172 Mask = VPStore->getMask();
11173 VL = VPStore->getVectorLength();
11174 } else {
11175 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11176 Val = MStore->getValue();
11177 Mask = MStore->getMask();
11178 IsCompressingStore = MStore->isCompressingStore();
11179 }
11180
11181 bool IsUnmasked =
11182 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11183
11184 MVT VT = Val.getSimpleValueType();
11185 MVT XLenVT = Subtarget.getXLenVT();
11186
11187 MVT ContainerVT = VT;
11188 if (VT.isFixedLengthVector()) {
11189 ContainerVT = getContainerForFixedLengthVector(VT);
11190
11191 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11192 if (!IsUnmasked || IsCompressingStore) {
11193 MVT MaskVT = getMaskTypeFor(ContainerVT);
11194 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11195 }
11196 }
11197
11198 if (!VL)
11199 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11200
11201 if (IsCompressingStore) {
11202 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11203 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11204 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11205 VL =
11206 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11207 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11208 }
11209
11210 unsigned IntID =
11211 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11212 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11213 Ops.push_back(Val);
11214 Ops.push_back(BasePtr);
11215 if (!IsUnmasked)
11216 Ops.push_back(Mask);
11217 Ops.push_back(VL);
11218
11220 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11221}
11222
11223SDValue
11224RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11225 SelectionDAG &DAG) const {
11226 MVT InVT = Op.getOperand(0).getSimpleValueType();
11227 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11228
11229 MVT VT = Op.getSimpleValueType();
11230
11231 SDValue Op1 =
11232 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11233 SDValue Op2 =
11234 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11235
11236 SDLoc DL(Op);
11237 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11238 DAG, Subtarget);
11239 MVT MaskVT = getMaskTypeFor(ContainerVT);
11240
11241 SDValue Cmp =
11242 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11243 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11244
11245 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11246}
11247
11248SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11249 SelectionDAG &DAG) const {
11250 unsigned Opc = Op.getOpcode();
11251 SDLoc DL(Op);
11252 SDValue Chain = Op.getOperand(0);
11253 SDValue Op1 = Op.getOperand(1);
11254 SDValue Op2 = Op.getOperand(2);
11255 SDValue CC = Op.getOperand(3);
11256 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11257 MVT VT = Op.getSimpleValueType();
11258 MVT InVT = Op1.getSimpleValueType();
11259
11260 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11261 // condition code.
11262 if (Opc == ISD::STRICT_FSETCCS) {
11263 // Expand strict_fsetccs(x, oeq) to
11264 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11265 SDVTList VTList = Op->getVTList();
11266 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11267 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11268 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11269 Op2, OLECCVal);
11270 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11271 Op1, OLECCVal);
11272 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11273 Tmp1.getValue(1), Tmp2.getValue(1));
11274 // Tmp1 and Tmp2 might be the same node.
11275 if (Tmp1 != Tmp2)
11276 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11277 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11278 }
11279
11280 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11281 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11282 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11283 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11284 Op2, OEQCCVal);
11285 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11286 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11287 }
11288 }
11289
11290 MVT ContainerInVT = InVT;
11291 if (InVT.isFixedLengthVector()) {
11292 ContainerInVT = getContainerForFixedLengthVector(InVT);
11293 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11294 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11295 }
11296 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11297
11298 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11299
11300 SDValue Res;
11301 if (Opc == ISD::STRICT_FSETCC &&
11302 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11303 CCVal == ISD::SETOLE)) {
11304 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11305 // active when both input elements are ordered.
11306 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11307 SDValue OrderMask1 = DAG.getNode(
11308 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11309 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11310 True, VL});
11311 SDValue OrderMask2 = DAG.getNode(
11312 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11313 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11314 True, VL});
11315 Mask =
11316 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11317 // Use Mask as the passthru operand to let the result be 0 if either of the
11318 // inputs is unordered.
11320 DAG.getVTList(MaskVT, MVT::Other),
11321 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11322 } else {
11323 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11325 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11326 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11327 }
11328
11329 if (VT.isFixedLengthVector()) {
11330 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11331 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11332 }
11333 return Res;
11334}
11335
11336// Lower vector ABS to smax(X, sub(0, X)).
11337SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11338 SDLoc DL(Op);
11339 MVT VT = Op.getSimpleValueType();
11340 SDValue X = Op.getOperand(0);
11341
11342 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11343 "Unexpected type for ISD::ABS");
11344
11345 MVT ContainerVT = VT;
11346 if (VT.isFixedLengthVector()) {
11347 ContainerVT = getContainerForFixedLengthVector(VT);
11348 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11349 }
11350
11351 SDValue Mask, VL;
11352 if (Op->getOpcode() == ISD::VP_ABS) {
11353 Mask = Op->getOperand(1);
11354 if (VT.isFixedLengthVector())
11355 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11356 Subtarget);
11357 VL = Op->getOperand(2);
11358 } else
11359 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11360
11361 SDValue SplatZero = DAG.getNode(
11362 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11363 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11364 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11365 DAG.getUNDEF(ContainerVT), Mask, VL);
11366 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11367 DAG.getUNDEF(ContainerVT), Mask, VL);
11368
11369 if (VT.isFixedLengthVector())
11370 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11371 return Max;
11372}
11373
11374SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11375 SDValue Op, SelectionDAG &DAG) const {
11376 SDLoc DL(Op);
11377 MVT VT = Op.getSimpleValueType();
11378 SDValue Mag = Op.getOperand(0);
11379 SDValue Sign = Op.getOperand(1);
11380 assert(Mag.getValueType() == Sign.getValueType() &&
11381 "Can only handle COPYSIGN with matching types.");
11382
11383 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11384 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11385 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11386
11387 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11388
11389 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11390 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11391
11392 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11393}
11394
11395SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11396 SDValue Op, SelectionDAG &DAG) const {
11397 MVT VT = Op.getSimpleValueType();
11398 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11399
11400 MVT I1ContainerVT =
11401 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11402
11403 SDValue CC =
11404 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11405 SDValue Op1 =
11406 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11407 SDValue Op2 =
11408 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11409
11410 SDLoc DL(Op);
11411 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11412
11413 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11414 Op2, DAG.getUNDEF(ContainerVT), VL);
11415
11416 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11417}
11418
11419SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11420 SelectionDAG &DAG) const {
11421 unsigned NewOpc = getRISCVVLOp(Op);
11422 bool HasPassthruOp = hasPassthruOp(NewOpc);
11423 bool HasMask = hasMaskOp(NewOpc);
11424
11425 MVT VT = Op.getSimpleValueType();
11426 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11427
11428 // Create list of operands by converting existing ones to scalable types.
11430 for (const SDValue &V : Op->op_values()) {
11431 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11432
11433 // Pass through non-vector operands.
11434 if (!V.getValueType().isVector()) {
11435 Ops.push_back(V);
11436 continue;
11437 }
11438
11439 // "cast" fixed length vector to a scalable vector.
11440 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11441 "Only fixed length vectors are supported!");
11442 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11443 }
11444
11445 SDLoc DL(Op);
11446 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11447 if (HasPassthruOp)
11448 Ops.push_back(DAG.getUNDEF(ContainerVT));
11449 if (HasMask)
11450 Ops.push_back(Mask);
11451 Ops.push_back(VL);
11452
11453 // StrictFP operations have two result values. Their lowered result should
11454 // have same result count.
11455 if (Op->isStrictFPOpcode()) {
11456 SDValue ScalableRes =
11457 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11458 Op->getFlags());
11459 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11460 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11461 }
11462
11463 SDValue ScalableRes =
11464 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11465 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11466}
11467
11468// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11469// * Operands of each node are assumed to be in the same order.
11470// * The EVL operand is promoted from i32 to i64 on RV64.
11471// * Fixed-length vectors are converted to their scalable-vector container
11472// types.
11473SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11474 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11475 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11476
11477 SDLoc DL(Op);
11478 MVT VT = Op.getSimpleValueType();
11480
11481 MVT ContainerVT = VT;
11482 if (VT.isFixedLengthVector())
11483 ContainerVT = getContainerForFixedLengthVector(VT);
11484
11485 for (const auto &OpIdx : enumerate(Op->ops())) {
11486 SDValue V = OpIdx.value();
11487 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11488 // Add dummy passthru value before the mask. Or if there isn't a mask,
11489 // before EVL.
11490 if (HasPassthruOp) {
11491 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11492 if (MaskIdx) {
11493 if (*MaskIdx == OpIdx.index())
11494 Ops.push_back(DAG.getUNDEF(ContainerVT));
11495 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11496 OpIdx.index()) {
11497 if (Op.getOpcode() == ISD::VP_MERGE) {
11498 // For VP_MERGE, copy the false operand instead of an undef value.
11499 Ops.push_back(Ops.back());
11500 } else {
11501 assert(Op.getOpcode() == ISD::VP_SELECT);
11502 // For VP_SELECT, add an undef value.
11503 Ops.push_back(DAG.getUNDEF(ContainerVT));
11504 }
11505 }
11506 }
11507 // Pass through operands which aren't fixed-length vectors.
11508 if (!V.getValueType().isFixedLengthVector()) {
11509 Ops.push_back(V);
11510 continue;
11511 }
11512 // "cast" fixed length vector to a scalable vector.
11513 MVT OpVT = V.getSimpleValueType();
11514 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11515 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11516 "Only fixed length vectors are supported!");
11517 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11518 }
11519
11520 if (!VT.isFixedLengthVector())
11521 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11522
11523 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11524
11525 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11526}
11527
11528SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11529 SelectionDAG &DAG) const {
11530 SDLoc DL(Op);
11531 MVT VT = Op.getSimpleValueType();
11532
11533 SDValue Src = Op.getOperand(0);
11534 // NOTE: Mask is dropped.
11535 SDValue VL = Op.getOperand(2);
11536
11537 MVT ContainerVT = VT;
11538 if (VT.isFixedLengthVector()) {
11539 ContainerVT = getContainerForFixedLengthVector(VT);
11540 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11541 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11542 }
11543
11544 MVT XLenVT = Subtarget.getXLenVT();
11545 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11546 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11547 DAG.getUNDEF(ContainerVT), Zero, VL);
11548
11549 SDValue SplatValue = DAG.getSignedConstant(
11550 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11551 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11552 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11553
11554 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11555 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11556 if (!VT.isFixedLengthVector())
11557 return Result;
11558 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11559}
11560
11561SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11562 SelectionDAG &DAG) const {
11563 SDLoc DL(Op);
11564 MVT VT = Op.getSimpleValueType();
11565
11566 SDValue Op1 = Op.getOperand(0);
11567 SDValue Op2 = Op.getOperand(1);
11568 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11569 // NOTE: Mask is dropped.
11570 SDValue VL = Op.getOperand(4);
11571
11572 MVT ContainerVT = VT;
11573 if (VT.isFixedLengthVector()) {
11574 ContainerVT = getContainerForFixedLengthVector(VT);
11575 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11576 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11577 }
11578
11580 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11581
11582 switch (Condition) {
11583 default:
11584 break;
11585 // X != Y --> (X^Y)
11586 case ISD::SETNE:
11587 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11588 break;
11589 // X == Y --> ~(X^Y)
11590 case ISD::SETEQ: {
11591 SDValue Temp =
11592 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11593 Result =
11594 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11595 break;
11596 }
11597 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11598 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11599 case ISD::SETGT:
11600 case ISD::SETULT: {
11601 SDValue Temp =
11602 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11603 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11604 break;
11605 }
11606 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11607 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11608 case ISD::SETLT:
11609 case ISD::SETUGT: {
11610 SDValue Temp =
11611 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11612 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11613 break;
11614 }
11615 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11616 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11617 case ISD::SETGE:
11618 case ISD::SETULE: {
11619 SDValue Temp =
11620 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11621 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11622 break;
11623 }
11624 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11625 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11626 case ISD::SETLE:
11627 case ISD::SETUGE: {
11628 SDValue Temp =
11629 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11630 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11631 break;
11632 }
11633 }
11634
11635 if (!VT.isFixedLengthVector())
11636 return Result;
11637 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11638}
11639
11640// Lower Floating-Point/Integer Type-Convert VP SDNodes
11641SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11642 SelectionDAG &DAG) const {
11643 SDLoc DL(Op);
11644
11645 SDValue Src = Op.getOperand(0);
11646 SDValue Mask = Op.getOperand(1);
11647 SDValue VL = Op.getOperand(2);
11648 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11649
11650 MVT DstVT = Op.getSimpleValueType();
11651 MVT SrcVT = Src.getSimpleValueType();
11652 if (DstVT.isFixedLengthVector()) {
11653 DstVT = getContainerForFixedLengthVector(DstVT);
11654 SrcVT = getContainerForFixedLengthVector(SrcVT);
11655 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11656 MVT MaskVT = getMaskTypeFor(DstVT);
11657 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11658 }
11659
11660 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11661 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11662
11664 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11665 if (SrcVT.isInteger()) {
11666 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11667
11668 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11671
11672 // Do we need to do any pre-widening before converting?
11673 if (SrcEltSize == 1) {
11674 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11675 MVT XLenVT = Subtarget.getXLenVT();
11676 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11677 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11678 DAG.getUNDEF(IntVT), Zero, VL);
11679 SDValue One = DAG.getSignedConstant(
11680 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11681 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11682 DAG.getUNDEF(IntVT), One, VL);
11683 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11684 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11685 } else if (DstEltSize > (2 * SrcEltSize)) {
11686 // Widen before converting.
11687 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11688 DstVT.getVectorElementCount());
11689 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11690 }
11691
11692 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11693 } else {
11694 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11695 "Wrong input/output vector types");
11696
11697 // Convert f16 to f32 then convert f32 to i64.
11698 if (DstEltSize > (2 * SrcEltSize)) {
11699 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11700 MVT InterimFVT =
11701 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11702 Src =
11703 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11704 }
11705
11706 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11707 }
11708 } else { // Narrowing + Conversion
11709 if (SrcVT.isInteger()) {
11710 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11711 // First do a narrowing convert to an FP type half the size, then round
11712 // the FP type to a small FP type if needed.
11713
11714 MVT InterimFVT = DstVT;
11715 if (SrcEltSize > (2 * DstEltSize)) {
11716 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11717 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11718 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11719 }
11720
11721 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11722
11723 if (InterimFVT != DstVT) {
11724 Src = Result;
11725 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11726 }
11727 } else {
11728 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11729 "Wrong input/output vector types");
11730 // First do a narrowing conversion to an integer half the size, then
11731 // truncate if needed.
11732
11733 if (DstEltSize == 1) {
11734 // First convert to the same size integer, then convert to mask using
11735 // setcc.
11736 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11737 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11738 DstVT.getVectorElementCount());
11739 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11740
11741 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11742 // otherwise the conversion was undefined.
11743 MVT XLenVT = Subtarget.getXLenVT();
11744 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11745 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11746 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11747 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11748 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11749 DAG.getUNDEF(DstVT), Mask, VL});
11750 } else {
11751 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11752 DstVT.getVectorElementCount());
11753
11754 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11755
11756 while (InterimIVT != DstVT) {
11757 SrcEltSize /= 2;
11758 Src = Result;
11759 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11760 DstVT.getVectorElementCount());
11761 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11762 Src, Mask, VL);
11763 }
11764 }
11765 }
11766 }
11767
11768 MVT VT = Op.getSimpleValueType();
11769 if (!VT.isFixedLengthVector())
11770 return Result;
11771 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11772}
11773
11774SDValue
11775RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11776 SelectionDAG &DAG) const {
11777 SDLoc DL(Op);
11778
11779 SDValue Op1 = Op.getOperand(0);
11780 SDValue Op2 = Op.getOperand(1);
11781 SDValue Offset = Op.getOperand(2);
11782 SDValue Mask = Op.getOperand(3);
11783 SDValue EVL1 = Op.getOperand(4);
11784 SDValue EVL2 = Op.getOperand(5);
11785
11786 const MVT XLenVT = Subtarget.getXLenVT();
11787 MVT VT = Op.getSimpleValueType();
11788 MVT ContainerVT = VT;
11789 if (VT.isFixedLengthVector()) {
11790 ContainerVT = getContainerForFixedLengthVector(VT);
11791 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11792 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11793 MVT MaskVT = getMaskTypeFor(ContainerVT);
11794 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11795 }
11796
11797 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11798 if (IsMaskVector) {
11799 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11800
11801 // Expand input operands
11802 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11803 DAG.getUNDEF(ContainerVT),
11804 DAG.getConstant(1, DL, XLenVT), EVL1);
11805 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11806 DAG.getUNDEF(ContainerVT),
11807 DAG.getConstant(0, DL, XLenVT), EVL1);
11808 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11809 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11810
11811 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11812 DAG.getUNDEF(ContainerVT),
11813 DAG.getConstant(1, DL, XLenVT), EVL2);
11814 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11815 DAG.getUNDEF(ContainerVT),
11816 DAG.getConstant(0, DL, XLenVT), EVL2);
11817 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11818 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11819 }
11820
11821 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11822 SDValue DownOffset, UpOffset;
11823 if (ImmValue >= 0) {
11824 // The operand is a TargetConstant, we need to rebuild it as a regular
11825 // constant.
11826 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11827 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11828 } else {
11829 // The operand is a TargetConstant, we need to rebuild it as a regular
11830 // constant rather than negating the original operand.
11831 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11832 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11833 }
11834
11835 SDValue SlideDown =
11836 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11837 Op1, DownOffset, Mask, UpOffset);
11838 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11839 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11840
11841 if (IsMaskVector) {
11842 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11843 Result = DAG.getNode(
11844 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11845 {Result, DAG.getConstant(0, DL, ContainerVT),
11846 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11847 Mask, EVL2});
11848 }
11849
11850 if (!VT.isFixedLengthVector())
11851 return Result;
11852 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11853}
11854
11855SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
11856 SelectionDAG &DAG) const {
11857 SDLoc DL(Op);
11858 SDValue Val = Op.getOperand(0);
11859 SDValue Mask = Op.getOperand(1);
11860 SDValue VL = Op.getOperand(2);
11861 MVT VT = Op.getSimpleValueType();
11862
11863 MVT ContainerVT = VT;
11864 if (VT.isFixedLengthVector()) {
11865 ContainerVT = getContainerForFixedLengthVector(VT);
11866 MVT MaskVT = getMaskTypeFor(ContainerVT);
11867 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11868 }
11869
11870 SDValue Result =
11871 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
11872
11873 if (!VT.isFixedLengthVector())
11874 return Result;
11875 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11876}
11877
11878SDValue
11879RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11880 SelectionDAG &DAG) const {
11881 SDLoc DL(Op);
11882 MVT VT = Op.getSimpleValueType();
11883 MVT XLenVT = Subtarget.getXLenVT();
11884
11885 SDValue Op1 = Op.getOperand(0);
11886 SDValue Mask = Op.getOperand(1);
11887 SDValue EVL = Op.getOperand(2);
11888
11889 MVT ContainerVT = VT;
11890 if (VT.isFixedLengthVector()) {
11891 ContainerVT = getContainerForFixedLengthVector(VT);
11892 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11893 MVT MaskVT = getMaskTypeFor(ContainerVT);
11894 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11895 }
11896
11897 MVT GatherVT = ContainerVT;
11898 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11899 // Check if we are working with mask vectors
11900 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11901 if (IsMaskVector) {
11902 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11903
11904 // Expand input operand
11905 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11906 DAG.getUNDEF(IndicesVT),
11907 DAG.getConstant(1, DL, XLenVT), EVL);
11908 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11909 DAG.getUNDEF(IndicesVT),
11910 DAG.getConstant(0, DL, XLenVT), EVL);
11911 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11912 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11913 }
11914
11915 unsigned EltSize = GatherVT.getScalarSizeInBits();
11916 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11917 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11918 unsigned MaxVLMAX =
11919 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11920
11921 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11922 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11923 // to use vrgatherei16.vv.
11924 // TODO: It's also possible to use vrgatherei16.vv for other types to
11925 // decrease register width for the index calculation.
11926 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11927 if (MaxVLMAX > 256 && EltSize == 8) {
11928 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11929 // Split the vector in half and reverse each half using a full register
11930 // reverse.
11931 // Swap the halves and concatenate them.
11932 // Slide the concatenated result by (VLMax - VL).
11933 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11934 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11935 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11936
11937 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11938 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11939
11940 // Reassemble the low and high pieces reversed.
11941 // NOTE: this Result is unmasked (because we do not need masks for
11942 // shuffles). If in the future this has to change, we can use a SELECT_VL
11943 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11944 SDValue Result =
11945 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11946
11947 // Slide off any elements from past EVL that were reversed into the low
11948 // elements.
11949 unsigned MinElts = GatherVT.getVectorMinNumElements();
11950 SDValue VLMax =
11951 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11952 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11953
11954 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11955 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11956
11957 if (IsMaskVector) {
11958 // Truncate Result back to a mask vector
11959 Result =
11960 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11961 {Result, DAG.getConstant(0, DL, GatherVT),
11963 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11964 }
11965
11966 if (!VT.isFixedLengthVector())
11967 return Result;
11968 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11969 }
11970
11971 // Just promote the int type to i16 which will double the LMUL.
11972 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11973 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11974 }
11975
11976 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11977 SDValue VecLen =
11978 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11979 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11980 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11981 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11982 DAG.getUNDEF(IndicesVT), Mask, EVL);
11983 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11984 DAG.getUNDEF(GatherVT), Mask, EVL);
11985
11986 if (IsMaskVector) {
11987 // Truncate Result back to a mask vector
11988 Result = DAG.getNode(
11989 RISCVISD::SETCC_VL, DL, ContainerVT,
11990 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11991 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11992 }
11993
11994 if (!VT.isFixedLengthVector())
11995 return Result;
11996 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11997}
11998
11999SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12000 SelectionDAG &DAG) const {
12001 MVT VT = Op.getSimpleValueType();
12002 if (VT.getVectorElementType() != MVT::i1)
12003 return lowerVPOp(Op, DAG);
12004
12005 // It is safe to drop mask parameter as masked-off elements are undef.
12006 SDValue Op1 = Op->getOperand(0);
12007 SDValue Op2 = Op->getOperand(1);
12008 SDValue VL = Op->getOperand(3);
12009
12010 MVT ContainerVT = VT;
12011 const bool IsFixed = VT.isFixedLengthVector();
12012 if (IsFixed) {
12013 ContainerVT = getContainerForFixedLengthVector(VT);
12014 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12015 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12016 }
12017
12018 SDLoc DL(Op);
12019 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12020 if (!IsFixed)
12021 return Val;
12022 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12023}
12024
12025SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12026 SelectionDAG &DAG) const {
12027 SDLoc DL(Op);
12028 MVT XLenVT = Subtarget.getXLenVT();
12029 MVT VT = Op.getSimpleValueType();
12030 MVT ContainerVT = VT;
12031 if (VT.isFixedLengthVector())
12032 ContainerVT = getContainerForFixedLengthVector(VT);
12033
12034 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12035
12036 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12037 // Check if the mask is known to be all ones
12038 SDValue Mask = VPNode->getMask();
12039 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12040
12041 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12042 : Intrinsic::riscv_vlse_mask,
12043 DL, XLenVT);
12044 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12045 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12046 VPNode->getStride()};
12047 if (!IsUnmasked) {
12048 if (VT.isFixedLengthVector()) {
12049 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12050 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12051 }
12052 Ops.push_back(Mask);
12053 }
12054 Ops.push_back(VPNode->getVectorLength());
12055 if (!IsUnmasked) {
12056 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12057 Ops.push_back(Policy);
12058 }
12059
12060 SDValue Result =
12062 VPNode->getMemoryVT(), VPNode->getMemOperand());
12063 SDValue Chain = Result.getValue(1);
12064
12065 if (VT.isFixedLengthVector())
12066 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12067
12068 return DAG.getMergeValues({Result, Chain}, DL);
12069}
12070
12071SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12072 SelectionDAG &DAG) const {
12073 SDLoc DL(Op);
12074 MVT XLenVT = Subtarget.getXLenVT();
12075
12076 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12077 SDValue StoreVal = VPNode->getValue();
12078 MVT VT = StoreVal.getSimpleValueType();
12079 MVT ContainerVT = VT;
12080 if (VT.isFixedLengthVector()) {
12081 ContainerVT = getContainerForFixedLengthVector(VT);
12082 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12083 }
12084
12085 // Check if the mask is known to be all ones
12086 SDValue Mask = VPNode->getMask();
12087 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12088
12089 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12090 : Intrinsic::riscv_vsse_mask,
12091 DL, XLenVT);
12092 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12093 VPNode->getBasePtr(), VPNode->getStride()};
12094 if (!IsUnmasked) {
12095 if (VT.isFixedLengthVector()) {
12096 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12097 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12098 }
12099 Ops.push_back(Mask);
12100 }
12101 Ops.push_back(VPNode->getVectorLength());
12102
12103 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12104 Ops, VPNode->getMemoryVT(),
12105 VPNode->getMemOperand());
12106}
12107
12108// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12109// matched to a RVV indexed load. The RVV indexed load instructions only
12110// support the "unsigned unscaled" addressing mode; indices are implicitly
12111// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12112// signed or scaled indexing is extended to the XLEN value type and scaled
12113// accordingly.
12114SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12115 SelectionDAG &DAG) const {
12116 SDLoc DL(Op);
12117 MVT VT = Op.getSimpleValueType();
12118
12119 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12120 EVT MemVT = MemSD->getMemoryVT();
12121 MachineMemOperand *MMO = MemSD->getMemOperand();
12122 SDValue Chain = MemSD->getChain();
12123 SDValue BasePtr = MemSD->getBasePtr();
12124
12125 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12126 SDValue Index, Mask, PassThru, VL;
12127
12128 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12129 Index = VPGN->getIndex();
12130 Mask = VPGN->getMask();
12131 PassThru = DAG.getUNDEF(VT);
12132 VL = VPGN->getVectorLength();
12133 // VP doesn't support extending loads.
12135 } else {
12136 // Else it must be a MGATHER.
12137 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12138 Index = MGN->getIndex();
12139 Mask = MGN->getMask();
12140 PassThru = MGN->getPassThru();
12141 LoadExtType = MGN->getExtensionType();
12142 }
12143
12144 MVT IndexVT = Index.getSimpleValueType();
12145 MVT XLenVT = Subtarget.getXLenVT();
12146
12148 "Unexpected VTs!");
12149 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12150 // Targets have to explicitly opt-in for extending vector loads.
12151 assert(LoadExtType == ISD::NON_EXTLOAD &&
12152 "Unexpected extending MGATHER/VP_GATHER");
12153
12154 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12155 // the selection of the masked intrinsics doesn't do this for us.
12156 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12157
12158 MVT ContainerVT = VT;
12159 if (VT.isFixedLengthVector()) {
12160 ContainerVT = getContainerForFixedLengthVector(VT);
12161 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12162 ContainerVT.getVectorElementCount());
12163
12164 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12165
12166 if (!IsUnmasked) {
12167 MVT MaskVT = getMaskTypeFor(ContainerVT);
12168 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12169 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12170 }
12171 }
12172
12173 if (!VL)
12174 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12175
12176 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12177 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12178 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12179 }
12180
12181 unsigned IntID =
12182 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12183 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12184 if (IsUnmasked)
12185 Ops.push_back(DAG.getUNDEF(ContainerVT));
12186 else
12187 Ops.push_back(PassThru);
12188 Ops.push_back(BasePtr);
12189 Ops.push_back(Index);
12190 if (!IsUnmasked)
12191 Ops.push_back(Mask);
12192 Ops.push_back(VL);
12193 if (!IsUnmasked)
12195
12196 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12197 SDValue Result =
12198 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12199 Chain = Result.getValue(1);
12200
12201 if (VT.isFixedLengthVector())
12202 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12203
12204 return DAG.getMergeValues({Result, Chain}, DL);
12205}
12206
12207// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12208// matched to a RVV indexed store. The RVV indexed store instructions only
12209// support the "unsigned unscaled" addressing mode; indices are implicitly
12210// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12211// signed or scaled indexing is extended to the XLEN value type and scaled
12212// accordingly.
12213SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12214 SelectionDAG &DAG) const {
12215 SDLoc DL(Op);
12216 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12217 EVT MemVT = MemSD->getMemoryVT();
12218 MachineMemOperand *MMO = MemSD->getMemOperand();
12219 SDValue Chain = MemSD->getChain();
12220 SDValue BasePtr = MemSD->getBasePtr();
12221
12222 [[maybe_unused]] bool IsTruncatingStore = false;
12223 SDValue Index, Mask, Val, VL;
12224
12225 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12226 Index = VPSN->getIndex();
12227 Mask = VPSN->getMask();
12228 Val = VPSN->getValue();
12229 VL = VPSN->getVectorLength();
12230 // VP doesn't support truncating stores.
12231 IsTruncatingStore = false;
12232 } else {
12233 // Else it must be a MSCATTER.
12234 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12235 Index = MSN->getIndex();
12236 Mask = MSN->getMask();
12237 Val = MSN->getValue();
12238 IsTruncatingStore = MSN->isTruncatingStore();
12239 }
12240
12241 MVT VT = Val.getSimpleValueType();
12242 MVT IndexVT = Index.getSimpleValueType();
12243 MVT XLenVT = Subtarget.getXLenVT();
12244
12246 "Unexpected VTs!");
12247 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12248 // Targets have to explicitly opt-in for extending vector loads and
12249 // truncating vector stores.
12250 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12251
12252 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12253 // the selection of the masked intrinsics doesn't do this for us.
12254 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12255
12256 MVT ContainerVT = VT;
12257 if (VT.isFixedLengthVector()) {
12258 ContainerVT = getContainerForFixedLengthVector(VT);
12259 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12260 ContainerVT.getVectorElementCount());
12261
12262 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12263 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12264
12265 if (!IsUnmasked) {
12266 MVT MaskVT = getMaskTypeFor(ContainerVT);
12267 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12268 }
12269 }
12270
12271 if (!VL)
12272 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12273
12274 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12275 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12276 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12277 }
12278
12279 unsigned IntID =
12280 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12281 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12282 Ops.push_back(Val);
12283 Ops.push_back(BasePtr);
12284 Ops.push_back(Index);
12285 if (!IsUnmasked)
12286 Ops.push_back(Mask);
12287 Ops.push_back(VL);
12288
12290 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12291}
12292
12293SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12294 SelectionDAG &DAG) const {
12295 const MVT XLenVT = Subtarget.getXLenVT();
12296 SDLoc DL(Op);
12297 SDValue Chain = Op->getOperand(0);
12298 SDValue SysRegNo = DAG.getTargetConstant(
12299 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
12300 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12301 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12302
12303 // Encoding used for rounding mode in RISC-V differs from that used in
12304 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12305 // table, which consists of a sequence of 4-bit fields, each representing
12306 // corresponding FLT_ROUNDS mode.
12307 static const int Table =
12313
12314 SDValue Shift =
12315 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12316 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12317 DAG.getConstant(Table, DL, XLenVT), Shift);
12318 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12319 DAG.getConstant(7, DL, XLenVT));
12320
12321 return DAG.getMergeValues({Masked, Chain}, DL);
12322}
12323
12324SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12325 SelectionDAG &DAG) const {
12326 const MVT XLenVT = Subtarget.getXLenVT();
12327 SDLoc DL(Op);
12328 SDValue Chain = Op->getOperand(0);
12329 SDValue RMValue = Op->getOperand(1);
12330 SDValue SysRegNo = DAG.getTargetConstant(
12331 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
12332
12333 // Encoding used for rounding mode in RISC-V differs from that used in
12334 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12335 // a table, which consists of a sequence of 4-bit fields, each representing
12336 // corresponding RISC-V mode.
12337 static const unsigned Table =
12343
12344 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12345
12346 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12347 DAG.getConstant(2, DL, XLenVT));
12348 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12349 DAG.getConstant(Table, DL, XLenVT), Shift);
12350 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12351 DAG.getConstant(0x7, DL, XLenVT));
12352 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12353 RMValue);
12354}
12355
12356SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12357 SelectionDAG &DAG) const {
12359
12360 bool isRISCV64 = Subtarget.is64Bit();
12361 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12362
12363 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12364 return DAG.getFrameIndex(FI, PtrVT);
12365}
12366
12367// Returns the opcode of the target-specific SDNode that implements the 32-bit
12368// form of the given Opcode.
12369static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12370 switch (Opcode) {
12371 default:
12372 llvm_unreachable("Unexpected opcode");
12373 case ISD::SHL:
12374 return RISCVISD::SLLW;
12375 case ISD::SRA:
12376 return RISCVISD::SRAW;
12377 case ISD::SRL:
12378 return RISCVISD::SRLW;
12379 case ISD::SDIV:
12380 return RISCVISD::DIVW;
12381 case ISD::UDIV:
12382 return RISCVISD::DIVUW;
12383 case ISD::UREM:
12384 return RISCVISD::REMUW;
12385 case ISD::ROTL:
12386 return RISCVISD::ROLW;
12387 case ISD::ROTR:
12388 return RISCVISD::RORW;
12389 }
12390}
12391
12392// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12393// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12394// otherwise be promoted to i64, making it difficult to select the
12395// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12396// type i8/i16/i32 is lost.
12398 unsigned ExtOpc = ISD::ANY_EXTEND) {
12399 SDLoc DL(N);
12400 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12401 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12402 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12403 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12404 // ReplaceNodeResults requires we maintain the same type for the return value.
12405 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12406}
12407
12408// Converts the given 32-bit operation to a i64 operation with signed extension
12409// semantic to reduce the signed extension instructions.
12411 SDLoc DL(N);
12412 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12413 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12414 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12415 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12416 DAG.getValueType(MVT::i32));
12417 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12418}
12419
12422 SelectionDAG &DAG) const {
12423 SDLoc DL(N);
12424 switch (N->getOpcode()) {
12425 default:
12426 llvm_unreachable("Don't know how to custom type legalize this operation!");
12429 case ISD::FP_TO_SINT:
12430 case ISD::FP_TO_UINT: {
12431 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12432 "Unexpected custom legalisation");
12433 bool IsStrict = N->isStrictFPOpcode();
12434 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12435 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12436 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12437 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12439 if (!isTypeLegal(Op0.getValueType()))
12440 return;
12441 if (IsStrict) {
12442 SDValue Chain = N->getOperand(0);
12443 // In absense of Zfh, promote f16 to f32, then convert.
12444 if (Op0.getValueType() == MVT::f16 &&
12445 !Subtarget.hasStdExtZfhOrZhinx()) {
12446 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12447 {Chain, Op0});
12448 Chain = Op0.getValue(1);
12449 }
12450 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12452 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12453 SDValue Res = DAG.getNode(
12454 Opc, DL, VTs, Chain, Op0,
12455 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12456 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12457 Results.push_back(Res.getValue(1));
12458 return;
12459 }
12460 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12461 // convert.
12462 if ((Op0.getValueType() == MVT::f16 &&
12463 !Subtarget.hasStdExtZfhOrZhinx()) ||
12464 Op0.getValueType() == MVT::bf16)
12465 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12466
12467 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12468 SDValue Res =
12469 DAG.getNode(Opc, DL, MVT::i64, Op0,
12470 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12471 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12472 return;
12473 }
12474 // If the FP type needs to be softened, emit a library call using the 'si'
12475 // version. If we left it to default legalization we'd end up with 'di'. If
12476 // the FP type doesn't need to be softened just let generic type
12477 // legalization promote the result type.
12478 RTLIB::Libcall LC;
12479 if (IsSigned)
12480 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12481 else
12482 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12483 MakeLibCallOptions CallOptions;
12484 EVT OpVT = Op0.getValueType();
12485 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12486 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12487 SDValue Result;
12488 std::tie(Result, Chain) =
12489 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12490 Results.push_back(Result);
12491 if (IsStrict)
12492 Results.push_back(Chain);
12493 break;
12494 }
12495 case ISD::LROUND: {
12496 SDValue Op0 = N->getOperand(0);
12497 EVT Op0VT = Op0.getValueType();
12498 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12500 if (!isTypeLegal(Op0VT))
12501 return;
12502
12503 // In absense of Zfh, promote f16 to f32, then convert.
12504 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12505 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12506
12507 SDValue Res =
12508 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12509 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12510 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12511 return;
12512 }
12513 // If the FP type needs to be softened, emit a library call to lround. We'll
12514 // need to truncate the result. We assume any value that doesn't fit in i32
12515 // is allowed to return an unspecified value.
12516 RTLIB::Libcall LC =
12517 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12518 MakeLibCallOptions CallOptions;
12519 EVT OpVT = Op0.getValueType();
12520 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12521 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12522 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12523 Results.push_back(Result);
12524 break;
12525 }
12526 case ISD::READCYCLECOUNTER:
12527 case ISD::READSTEADYCOUNTER: {
12528 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12529 "has custom type legalization on riscv32");
12530
12531 SDValue LoCounter, HiCounter;
12532 MVT XLenVT = Subtarget.getXLenVT();
12533 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12534 LoCounter = DAG.getTargetConstant(
12535 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
12536 HiCounter = DAG.getTargetConstant(
12537 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
12538 } else {
12539 LoCounter = DAG.getTargetConstant(
12540 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
12541 HiCounter = DAG.getTargetConstant(
12542 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
12543 }
12544 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12546 N->getOperand(0), LoCounter, HiCounter);
12547
12548 Results.push_back(
12549 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
12550 Results.push_back(RCW.getValue(2));
12551 break;
12552 }
12553 case ISD::LOAD: {
12554 if (!ISD::isNON_EXTLoad(N))
12555 return;
12556
12557 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12558 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12560
12561 SDLoc dl(N);
12562 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12563 Ld->getBasePtr(), Ld->getMemoryVT(),
12564 Ld->getMemOperand());
12565 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12566 Results.push_back(Res.getValue(1));
12567 return;
12568 }
12569 case ISD::MUL: {
12570 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12571 unsigned XLen = Subtarget.getXLen();
12572 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12573 if (Size > XLen) {
12574 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12575 SDValue LHS = N->getOperand(0);
12576 SDValue RHS = N->getOperand(1);
12577 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12578
12579 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12580 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12581 // We need exactly one side to be unsigned.
12582 if (LHSIsU == RHSIsU)
12583 return;
12584
12585 auto MakeMULPair = [&](SDValue S, SDValue U) {
12586 MVT XLenVT = Subtarget.getXLenVT();
12587 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12588 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12589 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12590 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12591 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12592 };
12593
12594 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12595 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12596
12597 // The other operand should be signed, but still prefer MULH when
12598 // possible.
12599 if (RHSIsU && LHSIsS && !RHSIsS)
12600 Results.push_back(MakeMULPair(LHS, RHS));
12601 else if (LHSIsU && RHSIsS && !LHSIsS)
12602 Results.push_back(MakeMULPair(RHS, LHS));
12603
12604 return;
12605 }
12606 [[fallthrough]];
12607 }
12608 case ISD::ADD:
12609 case ISD::SUB:
12610 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12611 "Unexpected custom legalisation");
12612 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12613 break;
12614 case ISD::SHL:
12615 case ISD::SRA:
12616 case ISD::SRL:
12617 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12618 "Unexpected custom legalisation");
12619 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12620 // If we can use a BSET instruction, allow default promotion to apply.
12621 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12622 isOneConstant(N->getOperand(0)))
12623 break;
12624 Results.push_back(customLegalizeToWOp(N, DAG));
12625 break;
12626 }
12627
12628 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12629 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12630 // shift amount.
12631 if (N->getOpcode() == ISD::SHL) {
12632 SDLoc DL(N);
12633 SDValue NewOp0 =
12634 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12635 SDValue NewOp1 =
12636 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12637 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12638 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12639 DAG.getValueType(MVT::i32));
12640 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12641 }
12642
12643 break;
12644 case ISD::ROTL:
12645 case ISD::ROTR:
12646 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12647 "Unexpected custom legalisation");
12648 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12649 Subtarget.hasVendorXTHeadBb()) &&
12650 "Unexpected custom legalization");
12651 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12652 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12653 return;
12654 Results.push_back(customLegalizeToWOp(N, DAG));
12655 break;
12656 case ISD::CTTZ:
12658 case ISD::CTLZ:
12659 case ISD::CTLZ_ZERO_UNDEF: {
12660 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12661 "Unexpected custom legalisation");
12662
12663 SDValue NewOp0 =
12664 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12665 bool IsCTZ =
12666 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12667 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12668 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12669 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12670 return;
12671 }
12672 case ISD::SDIV:
12673 case ISD::UDIV:
12674 case ISD::UREM: {
12675 MVT VT = N->getSimpleValueType(0);
12676 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12677 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12678 "Unexpected custom legalisation");
12679 // Don't promote division/remainder by constant since we should expand those
12680 // to multiply by magic constant.
12682 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12683 !isIntDivCheap(N->getValueType(0), Attr))
12684 return;
12685
12686 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12687 // the upper 32 bits. For other types we need to sign or zero extend
12688 // based on the opcode.
12689 unsigned ExtOpc = ISD::ANY_EXTEND;
12690 if (VT != MVT::i32)
12691 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12693
12694 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12695 break;
12696 }
12697 case ISD::SADDO: {
12698 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12699 "Unexpected custom legalisation");
12700
12701 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12702 // use the default legalization.
12703 if (!isa<ConstantSDNode>(N->getOperand(1)))
12704 return;
12705
12706 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12707 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12708 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12709 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12710 DAG.getValueType(MVT::i32));
12711
12712 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12713
12714 // For an addition, the result should be less than one of the operands (LHS)
12715 // if and only if the other operand (RHS) is negative, otherwise there will
12716 // be overflow.
12717 // For a subtraction, the result should be less than one of the operands
12718 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12719 // otherwise there will be overflow.
12720 EVT OType = N->getValueType(1);
12721 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12722 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12723
12724 SDValue Overflow =
12725 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12726 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12727 Results.push_back(Overflow);
12728 return;
12729 }
12730 case ISD::UADDO:
12731 case ISD::USUBO: {
12732 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12733 "Unexpected custom legalisation");
12734 bool IsAdd = N->getOpcode() == ISD::UADDO;
12735 // Create an ADDW or SUBW.
12736 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12737 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12738 SDValue Res =
12739 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12740 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12741 DAG.getValueType(MVT::i32));
12742
12743 SDValue Overflow;
12744 if (IsAdd && isOneConstant(RHS)) {
12745 // Special case uaddo X, 1 overflowed if the addition result is 0.
12746 // The general case (X + C) < C is not necessarily beneficial. Although we
12747 // reduce the live range of X, we may introduce the materialization of
12748 // constant C, especially when the setcc result is used by branch. We have
12749 // no compare with constant and branch instructions.
12750 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12751 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12752 } else if (IsAdd && isAllOnesConstant(RHS)) {
12753 // Special case uaddo X, -1 overflowed if X != 0.
12754 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12755 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12756 } else {
12757 // Sign extend the LHS and perform an unsigned compare with the ADDW
12758 // result. Since the inputs are sign extended from i32, this is equivalent
12759 // to comparing the lower 32 bits.
12760 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12761 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12762 IsAdd ? ISD::SETULT : ISD::SETUGT);
12763 }
12764
12765 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12766 Results.push_back(Overflow);
12767 return;
12768 }
12769 case ISD::UADDSAT:
12770 case ISD::USUBSAT: {
12771 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12772 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
12773 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12774 // promotion for UADDO/USUBO.
12775 Results.push_back(expandAddSubSat(N, DAG));
12776 return;
12777 }
12778 case ISD::SADDSAT:
12779 case ISD::SSUBSAT: {
12780 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12781 "Unexpected custom legalisation");
12782 Results.push_back(expandAddSubSat(N, DAG));
12783 return;
12784 }
12785 case ISD::ABS: {
12786 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12787 "Unexpected custom legalisation");
12788
12789 if (Subtarget.hasStdExtZbb()) {
12790 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12791 // This allows us to remember that the result is sign extended. Expanding
12792 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12793 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12794 N->getOperand(0));
12795 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12796 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12797 return;
12798 }
12799
12800 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12801 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12802
12803 // Freeze the source so we can increase it's use count.
12804 Src = DAG.getFreeze(Src);
12805
12806 // Copy sign bit to all bits using the sraiw pattern.
12807 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12808 DAG.getValueType(MVT::i32));
12809 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12810 DAG.getConstant(31, DL, MVT::i64));
12811
12812 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12813 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12814
12815 // NOTE: The result is only required to be anyextended, but sext is
12816 // consistent with type legalization of sub.
12817 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12818 DAG.getValueType(MVT::i32));
12819 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12820 return;
12821 }
12822 case ISD::BITCAST: {
12823 EVT VT = N->getValueType(0);
12824 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12825 SDValue Op0 = N->getOperand(0);
12826 EVT Op0VT = Op0.getValueType();
12827 MVT XLenVT = Subtarget.getXLenVT();
12828 if (VT == MVT::i16 &&
12829 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
12830 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
12831 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12832 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12833 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12834 Subtarget.hasStdExtFOrZfinx()) {
12835 SDValue FPConv =
12836 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12837 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12838 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12839 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12840 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12841 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12842 NewReg.getValue(0), NewReg.getValue(1));
12843 Results.push_back(RetReg);
12844 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12845 isTypeLegal(Op0VT)) {
12846 // Custom-legalize bitcasts from fixed-length vector types to illegal
12847 // scalar types in order to improve codegen. Bitcast the vector to a
12848 // one-element vector type whose element type is the same as the result
12849 // type, and extract the first element.
12850 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12851 if (isTypeLegal(BVT)) {
12852 SDValue BVec = DAG.getBitcast(BVT, Op0);
12853 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12854 DAG.getVectorIdxConstant(0, DL)));
12855 }
12856 }
12857 break;
12858 }
12859 case RISCVISD::BREV8:
12860 case RISCVISD::ORC_B: {
12861 MVT VT = N->getSimpleValueType(0);
12862 MVT XLenVT = Subtarget.getXLenVT();
12863 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12864 "Unexpected custom legalisation");
12865 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
12866 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
12867 "Unexpected extension");
12868 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12869 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12870 // ReplaceNodeResults requires we maintain the same type for the return
12871 // value.
12872 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12873 break;
12874 }
12876 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12877 // type is illegal (currently only vXi64 RV32).
12878 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12879 // transferred to the destination register. We issue two of these from the
12880 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12881 // first element.
12882 SDValue Vec = N->getOperand(0);
12883 SDValue Idx = N->getOperand(1);
12884
12885 // The vector type hasn't been legalized yet so we can't issue target
12886 // specific nodes if it needs legalization.
12887 // FIXME: We would manually legalize if it's important.
12888 if (!isTypeLegal(Vec.getValueType()))
12889 return;
12890
12891 MVT VecVT = Vec.getSimpleValueType();
12892
12893 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12894 VecVT.getVectorElementType() == MVT::i64 &&
12895 "Unexpected EXTRACT_VECTOR_ELT legalization");
12896
12897 // If this is a fixed vector, we need to convert it to a scalable vector.
12898 MVT ContainerVT = VecVT;
12899 if (VecVT.isFixedLengthVector()) {
12900 ContainerVT = getContainerForFixedLengthVector(VecVT);
12901 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12902 }
12903
12904 MVT XLenVT = Subtarget.getXLenVT();
12905
12906 // Use a VL of 1 to avoid processing more elements than we need.
12907 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12908
12909 // Unless the index is known to be 0, we must slide the vector down to get
12910 // the desired element into index 0.
12911 if (!isNullConstant(Idx)) {
12912 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12913 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12914 }
12915
12916 // Extract the lower XLEN bits of the correct vector element.
12917 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12918
12919 // To extract the upper XLEN bits of the vector element, shift the first
12920 // element right by 32 bits and re-extract the lower XLEN bits.
12921 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12922 DAG.getUNDEF(ContainerVT),
12923 DAG.getConstant(32, DL, XLenVT), VL);
12924 SDValue LShr32 =
12925 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12926 DAG.getUNDEF(ContainerVT), Mask, VL);
12927
12928 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12929
12930 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12931 break;
12932 }
12934 unsigned IntNo = N->getConstantOperandVal(0);
12935 switch (IntNo) {
12936 default:
12938 "Don't know how to custom type legalize this intrinsic!");
12939 case Intrinsic::experimental_get_vector_length: {
12940 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12941 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12942 return;
12943 }
12944 case Intrinsic::experimental_cttz_elts: {
12945 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12946 Results.push_back(
12947 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12948 return;
12949 }
12950 case Intrinsic::riscv_orc_b:
12951 case Intrinsic::riscv_brev8:
12952 case Intrinsic::riscv_sha256sig0:
12953 case Intrinsic::riscv_sha256sig1:
12954 case Intrinsic::riscv_sha256sum0:
12955 case Intrinsic::riscv_sha256sum1:
12956 case Intrinsic::riscv_sm3p0:
12957 case Intrinsic::riscv_sm3p1: {
12958 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12959 return;
12960 unsigned Opc;
12961 switch (IntNo) {
12962 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12963 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12964 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12965 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12966 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12967 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12968 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12969 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12970 }
12971
12972 SDValue NewOp =
12973 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12974 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12975 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12976 return;
12977 }
12978 case Intrinsic::riscv_sm4ks:
12979 case Intrinsic::riscv_sm4ed: {
12980 unsigned Opc =
12981 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12982 SDValue NewOp0 =
12983 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12984 SDValue NewOp1 =
12985 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12986 SDValue Res =
12987 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12988 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12989 return;
12990 }
12991 case Intrinsic::riscv_mopr: {
12992 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12993 return;
12994 SDValue NewOp =
12995 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12996 SDValue Res = DAG.getNode(
12997 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12998 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12999 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13000 return;
13001 }
13002 case Intrinsic::riscv_moprr: {
13003 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13004 return;
13005 SDValue NewOp0 =
13006 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13007 SDValue NewOp1 =
13008 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13009 SDValue Res = DAG.getNode(
13010 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13011 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13012 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13013 return;
13014 }
13015 case Intrinsic::riscv_clmul: {
13016 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13017 return;
13018
13019 SDValue NewOp0 =
13020 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13021 SDValue NewOp1 =
13022 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13023 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13024 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13025 return;
13026 }
13027 case Intrinsic::riscv_clmulh:
13028 case Intrinsic::riscv_clmulr: {
13029 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13030 return;
13031
13032 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13033 // to the full 128-bit clmul result of multiplying two xlen values.
13034 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13035 // upper 32 bits.
13036 //
13037 // The alternative is to mask the inputs to 32 bits and use clmul, but
13038 // that requires two shifts to mask each input without zext.w.
13039 // FIXME: If the inputs are known zero extended or could be freely
13040 // zero extended, the mask form would be better.
13041 SDValue NewOp0 =
13042 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13043 SDValue NewOp1 =
13044 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13045 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13046 DAG.getConstant(32, DL, MVT::i64));
13047 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13048 DAG.getConstant(32, DL, MVT::i64));
13049 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13051 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13052 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13053 DAG.getConstant(32, DL, MVT::i64));
13054 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13055 return;
13056 }
13057 case Intrinsic::riscv_vmv_x_s: {
13058 EVT VT = N->getValueType(0);
13059 MVT XLenVT = Subtarget.getXLenVT();
13060 if (VT.bitsLT(XLenVT)) {
13061 // Simple case just extract using vmv.x.s and truncate.
13062 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13063 Subtarget.getXLenVT(), N->getOperand(1));
13064 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13065 return;
13066 }
13067
13068 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13069 "Unexpected custom legalization");
13070
13071 // We need to do the move in two steps.
13072 SDValue Vec = N->getOperand(1);
13073 MVT VecVT = Vec.getSimpleValueType();
13074
13075 // First extract the lower XLEN bits of the element.
13076 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13077
13078 // To extract the upper XLEN bits of the vector element, shift the first
13079 // element right by 32 bits and re-extract the lower XLEN bits.
13080 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13081
13082 SDValue ThirtyTwoV =
13083 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13084 DAG.getConstant(32, DL, XLenVT), VL);
13085 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13086 DAG.getUNDEF(VecVT), Mask, VL);
13087 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13088
13089 Results.push_back(
13090 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13091 break;
13092 }
13093 }
13094 break;
13095 }
13096 case ISD::VECREDUCE_ADD:
13097 case ISD::VECREDUCE_AND:
13098 case ISD::VECREDUCE_OR:
13099 case ISD::VECREDUCE_XOR:
13100 case ISD::VECREDUCE_SMAX:
13101 case ISD::VECREDUCE_UMAX:
13102 case ISD::VECREDUCE_SMIN:
13103 case ISD::VECREDUCE_UMIN:
13104 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13105 Results.push_back(V);
13106 break;
13107 case ISD::VP_REDUCE_ADD:
13108 case ISD::VP_REDUCE_AND:
13109 case ISD::VP_REDUCE_OR:
13110 case ISD::VP_REDUCE_XOR:
13111 case ISD::VP_REDUCE_SMAX:
13112 case ISD::VP_REDUCE_UMAX:
13113 case ISD::VP_REDUCE_SMIN:
13114 case ISD::VP_REDUCE_UMIN:
13115 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13116 Results.push_back(V);
13117 break;
13118 case ISD::GET_ROUNDING: {
13119 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13120 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13121 Results.push_back(Res.getValue(0));
13122 Results.push_back(Res.getValue(1));
13123 break;
13124 }
13125 }
13126}
13127
13128/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13129/// which corresponds to it.
13130static unsigned getVecReduceOpcode(unsigned Opc) {
13131 switch (Opc) {
13132 default:
13133 llvm_unreachable("Unhandled binary to transfrom reduction");
13134 case ISD::ADD:
13135 return ISD::VECREDUCE_ADD;
13136 case ISD::UMAX:
13137 return ISD::VECREDUCE_UMAX;
13138 case ISD::SMAX:
13139 return ISD::VECREDUCE_SMAX;
13140 case ISD::UMIN:
13141 return ISD::VECREDUCE_UMIN;
13142 case ISD::SMIN:
13143 return ISD::VECREDUCE_SMIN;
13144 case ISD::AND:
13145 return ISD::VECREDUCE_AND;
13146 case ISD::OR:
13147 return ISD::VECREDUCE_OR;
13148 case ISD::XOR:
13149 return ISD::VECREDUCE_XOR;
13150 case ISD::FADD:
13151 // Note: This is the associative form of the generic reduction opcode.
13152 return ISD::VECREDUCE_FADD;
13153 }
13154}
13155
13156/// Perform two related transforms whose purpose is to incrementally recognize
13157/// an explode_vector followed by scalar reduction as a vector reduction node.
13158/// This exists to recover from a deficiency in SLP which can't handle
13159/// forests with multiple roots sharing common nodes. In some cases, one
13160/// of the trees will be vectorized, and the other will remain (unprofitably)
13161/// scalarized.
13162static SDValue
13164 const RISCVSubtarget &Subtarget) {
13165
13166 // This transforms need to run before all integer types have been legalized
13167 // to i64 (so that the vector element type matches the add type), and while
13168 // it's safe to introduce odd sized vector types.
13170 return SDValue();
13171
13172 // Without V, this transform isn't useful. We could form the (illegal)
13173 // operations and let them be scalarized again, but there's really no point.
13174 if (!Subtarget.hasVInstructions())
13175 return SDValue();
13176
13177 const SDLoc DL(N);
13178 const EVT VT = N->getValueType(0);
13179 const unsigned Opc = N->getOpcode();
13180
13181 // For FADD, we only handle the case with reassociation allowed. We
13182 // could handle strict reduction order, but at the moment, there's no
13183 // known reason to, and the complexity isn't worth it.
13184 // TODO: Handle fminnum and fmaxnum here
13185 if (!VT.isInteger() &&
13186 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13187 return SDValue();
13188
13189 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13190 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13191 "Inconsistent mappings");
13192 SDValue LHS = N->getOperand(0);
13193 SDValue RHS = N->getOperand(1);
13194
13195 if (!LHS.hasOneUse() || !RHS.hasOneUse())
13196 return SDValue();
13197
13198 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13199 std::swap(LHS, RHS);
13200
13201 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13202 !isa<ConstantSDNode>(RHS.getOperand(1)))
13203 return SDValue();
13204
13205 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13206 SDValue SrcVec = RHS.getOperand(0);
13207 EVT SrcVecVT = SrcVec.getValueType();
13208 assert(SrcVecVT.getVectorElementType() == VT);
13209 if (SrcVecVT.isScalableVector())
13210 return SDValue();
13211
13212 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13213 return SDValue();
13214
13215 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13216 // reduce_op (extract_subvector [2 x VT] from V). This will form the
13217 // root of our reduction tree. TODO: We could extend this to any two
13218 // adjacent aligned constant indices if desired.
13219 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13220 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13221 uint64_t LHSIdx =
13222 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13223 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13224 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13225 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13226 DAG.getVectorIdxConstant(0, DL));
13227 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13228 }
13229 }
13230
13231 // Match (binop (reduce (extract_subvector V, 0),
13232 // (extract_vector_elt V, sizeof(SubVec))))
13233 // into a reduction of one more element from the original vector V.
13234 if (LHS.getOpcode() != ReduceOpc)
13235 return SDValue();
13236
13237 SDValue ReduceVec = LHS.getOperand(0);
13238 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13239 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13240 isNullConstant(ReduceVec.getOperand(1)) &&
13241 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13242 // For illegal types (e.g. 3xi32), most will be combined again into a
13243 // wider (hopefully legal) type. If this is a terminal state, we are
13244 // relying on type legalization here to produce something reasonable
13245 // and this lowering quality could probably be improved. (TODO)
13246 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13247 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13248 DAG.getVectorIdxConstant(0, DL));
13249 auto Flags = ReduceVec->getFlags();
13250 Flags.intersectWith(N->getFlags());
13251 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
13252 }
13253
13254 return SDValue();
13255}
13256
13257
13258// Try to fold (<bop> x, (reduction.<bop> vec, start))
13260 const RISCVSubtarget &Subtarget) {
13261 auto BinOpToRVVReduce = [](unsigned Opc) {
13262 switch (Opc) {
13263 default:
13264 llvm_unreachable("Unhandled binary to transfrom reduction");
13265 case ISD::ADD:
13267 case ISD::UMAX:
13269 case ISD::SMAX:
13271 case ISD::UMIN:
13273 case ISD::SMIN:
13275 case ISD::AND:
13277 case ISD::OR:
13279 case ISD::XOR:
13281 case ISD::FADD:
13283 case ISD::FMAXNUM:
13285 case ISD::FMINNUM:
13287 }
13288 };
13289
13290 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13291 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13292 isNullConstant(V.getOperand(1)) &&
13293 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13294 };
13295
13296 unsigned Opc = N->getOpcode();
13297 unsigned ReduceIdx;
13298 if (IsReduction(N->getOperand(0), Opc))
13299 ReduceIdx = 0;
13300 else if (IsReduction(N->getOperand(1), Opc))
13301 ReduceIdx = 1;
13302 else
13303 return SDValue();
13304
13305 // Skip if FADD disallows reassociation but the combiner needs.
13306 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13307 return SDValue();
13308
13309 SDValue Extract = N->getOperand(ReduceIdx);
13310 SDValue Reduce = Extract.getOperand(0);
13311 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13312 return SDValue();
13313
13314 SDValue ScalarV = Reduce.getOperand(2);
13315 EVT ScalarVT = ScalarV.getValueType();
13316 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13317 ScalarV.getOperand(0)->isUndef() &&
13318 isNullConstant(ScalarV.getOperand(2)))
13319 ScalarV = ScalarV.getOperand(1);
13320
13321 // Make sure that ScalarV is a splat with VL=1.
13322 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13323 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13324 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13325 return SDValue();
13326
13327 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13328 return SDValue();
13329
13330 // Check the scalar of ScalarV is neutral element
13331 // TODO: Deal with value other than neutral element.
13332 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13333 0))
13334 return SDValue();
13335
13336 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13337 // FIXME: We might be able to improve this if operand 0 is undef.
13338 if (!isNonZeroAVL(Reduce.getOperand(5)))
13339 return SDValue();
13340
13341 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13342
13343 SDLoc DL(N);
13344 SDValue NewScalarV =
13345 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13346 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13347
13348 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13349 if (ScalarVT != ScalarV.getValueType())
13350 NewScalarV =
13351 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13352 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13353
13354 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13355 NewScalarV, Reduce.getOperand(3),
13356 Reduce.getOperand(4), Reduce.getOperand(5)};
13357 SDValue NewReduce =
13358 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13359 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13360 Extract.getOperand(1));
13361}
13362
13363// Optimize (add (shl x, c0), (shl y, c1)) ->
13364// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13366 const RISCVSubtarget &Subtarget) {
13367 // Perform this optimization only in the zba extension.
13368 if (!Subtarget.hasStdExtZba())
13369 return SDValue();
13370
13371 // Skip for vector types and larger types.
13372 EVT VT = N->getValueType(0);
13373 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13374 return SDValue();
13375
13376 // The two operand nodes must be SHL and have no other use.
13377 SDValue N0 = N->getOperand(0);
13378 SDValue N1 = N->getOperand(1);
13379 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13380 !N0->hasOneUse() || !N1->hasOneUse())
13381 return SDValue();
13382
13383 // Check c0 and c1.
13384 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13385 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13386 if (!N0C || !N1C)
13387 return SDValue();
13388 int64_t C0 = N0C->getSExtValue();
13389 int64_t C1 = N1C->getSExtValue();
13390 if (C0 <= 0 || C1 <= 0)
13391 return SDValue();
13392
13393 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13394 int64_t Bits = std::min(C0, C1);
13395 int64_t Diff = std::abs(C0 - C1);
13396 if (Diff != 1 && Diff != 2 && Diff != 3)
13397 return SDValue();
13398
13399 // Build nodes.
13400 SDLoc DL(N);
13401 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13402 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13403 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13404 DAG.getConstant(Diff, DL, VT), NS);
13405 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13406}
13407
13408// Combine a constant select operand into its use:
13409//
13410// (and (select cond, -1, c), x)
13411// -> (select cond, x, (and x, c)) [AllOnes=1]
13412// (or (select cond, 0, c), x)
13413// -> (select cond, x, (or x, c)) [AllOnes=0]
13414// (xor (select cond, 0, c), x)
13415// -> (select cond, x, (xor x, c)) [AllOnes=0]
13416// (add (select cond, 0, c), x)
13417// -> (select cond, x, (add x, c)) [AllOnes=0]
13418// (sub x, (select cond, 0, c))
13419// -> (select cond, x, (sub x, c)) [AllOnes=0]
13421 SelectionDAG &DAG, bool AllOnes,
13422 const RISCVSubtarget &Subtarget) {
13423 EVT VT = N->getValueType(0);
13424
13425 // Skip vectors.
13426 if (VT.isVector())
13427 return SDValue();
13428
13429 if (!Subtarget.hasConditionalMoveFusion()) {
13430 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13431 if ((!Subtarget.hasStdExtZicond() &&
13432 !Subtarget.hasVendorXVentanaCondOps()) ||
13433 N->getOpcode() != ISD::AND)
13434 return SDValue();
13435
13436 // Maybe harmful when condition code has multiple use.
13437 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13438 return SDValue();
13439
13440 // Maybe harmful when VT is wider than XLen.
13441 if (VT.getSizeInBits() > Subtarget.getXLen())
13442 return SDValue();
13443 }
13444
13445 if ((Slct.getOpcode() != ISD::SELECT &&
13446 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13447 !Slct.hasOneUse())
13448 return SDValue();
13449
13450 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13452 };
13453
13454 bool SwapSelectOps;
13455 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13456 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13457 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13458 SDValue NonConstantVal;
13459 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13460 SwapSelectOps = false;
13461 NonConstantVal = FalseVal;
13462 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13463 SwapSelectOps = true;
13464 NonConstantVal = TrueVal;
13465 } else
13466 return SDValue();
13467
13468 // Slct is now know to be the desired identity constant when CC is true.
13469 TrueVal = OtherOp;
13470 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13471 // Unless SwapSelectOps says the condition should be false.
13472 if (SwapSelectOps)
13473 std::swap(TrueVal, FalseVal);
13474
13475 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13476 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13477 {Slct.getOperand(0), Slct.getOperand(1),
13478 Slct.getOperand(2), TrueVal, FalseVal});
13479
13480 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13481 {Slct.getOperand(0), TrueVal, FalseVal});
13482}
13483
13484// Attempt combineSelectAndUse on each operand of a commutative operator N.
13486 bool AllOnes,
13487 const RISCVSubtarget &Subtarget) {
13488 SDValue N0 = N->getOperand(0);
13489 SDValue N1 = N->getOperand(1);
13490 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13491 return Result;
13492 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13493 return Result;
13494 return SDValue();
13495}
13496
13497// Transform (add (mul x, c0), c1) ->
13498// (add (mul (add x, c1/c0), c0), c1%c0).
13499// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13500// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13501// to an infinite loop in DAGCombine if transformed.
13502// Or transform (add (mul x, c0), c1) ->
13503// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13504// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13505// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13506// lead to an infinite loop in DAGCombine if transformed.
13507// Or transform (add (mul x, c0), c1) ->
13508// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13509// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13510// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13511// lead to an infinite loop in DAGCombine if transformed.
13512// Or transform (add (mul x, c0), c1) ->
13513// (mul (add x, c1/c0), c0).
13514// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13516 const RISCVSubtarget &Subtarget) {
13517 // Skip for vector types and larger types.
13518 EVT VT = N->getValueType(0);
13519 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13520 return SDValue();
13521 // The first operand node must be a MUL and has no other use.
13522 SDValue N0 = N->getOperand(0);
13523 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13524 return SDValue();
13525 // Check if c0 and c1 match above conditions.
13526 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13527 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13528 if (!N0C || !N1C)
13529 return SDValue();
13530 // If N0C has multiple uses it's possible one of the cases in
13531 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13532 // in an infinite loop.
13533 if (!N0C->hasOneUse())
13534 return SDValue();
13535 int64_t C0 = N0C->getSExtValue();
13536 int64_t C1 = N1C->getSExtValue();
13537 int64_t CA, CB;
13538 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13539 return SDValue();
13540 // Search for proper CA (non-zero) and CB that both are simm12.
13541 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13542 !isInt<12>(C0 * (C1 / C0))) {
13543 CA = C1 / C0;
13544 CB = C1 % C0;
13545 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13546 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13547 CA = C1 / C0 + 1;
13548 CB = C1 % C0 - C0;
13549 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13550 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13551 CA = C1 / C0 - 1;
13552 CB = C1 % C0 + C0;
13553 } else
13554 return SDValue();
13555 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13556 SDLoc DL(N);
13557 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13558 DAG.getSignedConstant(CA, DL, VT));
13559 SDValue New1 =
13560 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
13561 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
13562}
13563
13564// add (zext, zext) -> zext (add (zext, zext))
13565// sub (zext, zext) -> sext (sub (zext, zext))
13566// mul (zext, zext) -> zext (mul (zext, zext))
13567// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13568// udiv (zext, zext) -> zext (udiv (zext, zext))
13569// srem (zext, zext) -> zext (srem (zext, zext))
13570// urem (zext, zext) -> zext (urem (zext, zext))
13571//
13572// where the sum of the extend widths match, and the the range of the bin op
13573// fits inside the width of the narrower bin op. (For profitability on rvv, we
13574// use a power of two for both inner and outer extend.)
13576
13577 EVT VT = N->getValueType(0);
13578 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13579 return SDValue();
13580
13581 SDValue N0 = N->getOperand(0);
13582 SDValue N1 = N->getOperand(1);
13584 return SDValue();
13585 if (!N0.hasOneUse() || !N1.hasOneUse())
13586 return SDValue();
13587
13588 SDValue Src0 = N0.getOperand(0);
13589 SDValue Src1 = N1.getOperand(0);
13590 EVT SrcVT = Src0.getValueType();
13591 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13592 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13593 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13594 return SDValue();
13595
13596 LLVMContext &C = *DAG.getContext();
13598 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13599
13600 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13601 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13602
13603 // Src0 and Src1 are zero extended, so they're always positive if signed.
13604 //
13605 // sub can produce a negative from two positive operands, so it needs sign
13606 // extended. Other nodes produce a positive from two positive operands, so
13607 // zero extend instead.
13608 unsigned OuterExtend =
13610
13611 return DAG.getNode(
13612 OuterExtend, SDLoc(N), VT,
13613 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13614}
13615
13616// Try to turn (add (xor bool, 1) -1) into (neg bool).
13618 SDValue N0 = N->getOperand(0);
13619 SDValue N1 = N->getOperand(1);
13620 EVT VT = N->getValueType(0);
13621 SDLoc DL(N);
13622
13623 // RHS should be -1.
13624 if (!isAllOnesConstant(N1))
13625 return SDValue();
13626
13627 // Look for (xor X, 1).
13628 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13629 return SDValue();
13630
13631 // First xor input should be 0 or 1.
13633 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13634 return SDValue();
13635
13636 // Emit a negate of the setcc.
13637 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13638 N0.getOperand(0));
13639}
13640
13643 const RISCVSubtarget &Subtarget) {
13644 SelectionDAG &DAG = DCI.DAG;
13645 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13646 return V;
13647 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13648 return V;
13649 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
13650 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13651 return V;
13652 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13653 return V;
13654 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13655 return V;
13656 if (SDValue V = combineBinOpOfZExt(N, DAG))
13657 return V;
13658
13659 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13660 // (select lhs, rhs, cc, x, (add x, y))
13661 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13662}
13663
13664// Try to turn a sub boolean RHS and constant LHS into an addi.
13666 SDValue N0 = N->getOperand(0);
13667 SDValue N1 = N->getOperand(1);
13668 EVT VT = N->getValueType(0);
13669 SDLoc DL(N);
13670
13671 // Require a constant LHS.
13672 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13673 if (!N0C)
13674 return SDValue();
13675
13676 // All our optimizations involve subtracting 1 from the immediate and forming
13677 // an ADDI. Make sure the new immediate is valid for an ADDI.
13678 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13679 if (!ImmValMinus1.isSignedIntN(12))
13680 return SDValue();
13681
13682 SDValue NewLHS;
13683 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13684 // (sub constant, (setcc x, y, eq/neq)) ->
13685 // (add (setcc x, y, neq/eq), constant - 1)
13686 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13687 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13688 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13689 return SDValue();
13690 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13691 NewLHS =
13692 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13693 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13694 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13695 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13696 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13697 NewLHS = N1.getOperand(0);
13698 } else
13699 return SDValue();
13700
13701 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13702 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13703}
13704
13705// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are
13706// non-zero. Replace with orc.b.
13708 const RISCVSubtarget &Subtarget) {
13709 if (!Subtarget.hasStdExtZbb())
13710 return SDValue();
13711
13712 EVT VT = N->getValueType(0);
13713
13714 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
13715 return SDValue();
13716
13717 SDValue N0 = N->getOperand(0);
13718 SDValue N1 = N->getOperand(1);
13719
13720 if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())
13721 return SDValue();
13722
13723 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
13724 if (!ShAmtC || ShAmtC->getZExtValue() != 8)
13725 return SDValue();
13726
13727 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));
13728 if (!DAG.MaskedValueIsZero(N1, Mask))
13729 return SDValue();
13730
13731 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);
13732}
13733
13735 const RISCVSubtarget &Subtarget) {
13736 if (SDValue V = combineSubOfBoolean(N, DAG))
13737 return V;
13738
13739 EVT VT = N->getValueType(0);
13740 SDValue N0 = N->getOperand(0);
13741 SDValue N1 = N->getOperand(1);
13742 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13743 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13744 isNullConstant(N1.getOperand(1))) {
13745 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13746 if (CCVal == ISD::SETLT) {
13747 SDLoc DL(N);
13748 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13749 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13750 DAG.getConstant(ShAmt, DL, VT));
13751 }
13752 }
13753
13754 if (SDValue V = combineBinOpOfZExt(N, DAG))
13755 return V;
13756 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
13757 return V;
13758
13759 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13760 // (select lhs, rhs, cc, x, (sub x, y))
13761 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13762}
13763
13764// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13765// Legalizing setcc can introduce xors like this. Doing this transform reduces
13766// the number of xors and may allow the xor to fold into a branch condition.
13768 SDValue N0 = N->getOperand(0);
13769 SDValue N1 = N->getOperand(1);
13770 bool IsAnd = N->getOpcode() == ISD::AND;
13771
13772 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13773 return SDValue();
13774
13775 if (!N0.hasOneUse() || !N1.hasOneUse())
13776 return SDValue();
13777
13778 SDValue N01 = N0.getOperand(1);
13779 SDValue N11 = N1.getOperand(1);
13780
13781 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13782 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13783 // operation is And, allow one of the Xors to use -1.
13784 if (isOneConstant(N01)) {
13785 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13786 return SDValue();
13787 } else if (isOneConstant(N11)) {
13788 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13789 if (!(IsAnd && isAllOnesConstant(N01)))
13790 return SDValue();
13791 } else
13792 return SDValue();
13793
13794 EVT VT = N->getValueType(0);
13795
13796 SDValue N00 = N0.getOperand(0);
13797 SDValue N10 = N1.getOperand(0);
13798
13799 // The LHS of the xors needs to be 0/1.
13801 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13802 return SDValue();
13803
13804 // Invert the opcode and insert a new xor.
13805 SDLoc DL(N);
13806 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13807 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13808 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13809}
13810
13811// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
13812// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
13813// value to an unsigned value. This will be lowered to vmax and series of
13814// vnclipu instructions later. This can be extended to other truncated types
13815// other than i8 by replacing 256 and 255 with the equivalent constants for the
13816// type.
13818 EVT VT = N->getValueType(0);
13819 SDValue N0 = N->getOperand(0);
13820 EVT SrcVT = N0.getValueType();
13821
13822 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13823 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
13824 return SDValue();
13825
13826 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
13827 return SDValue();
13828
13829 SDValue Cond = N0.getOperand(0);
13830 SDValue True = N0.getOperand(1);
13831 SDValue False = N0.getOperand(2);
13832
13833 if (Cond.getOpcode() != ISD::SETCC)
13834 return SDValue();
13835
13836 // FIXME: Support the version of this pattern with the select operands
13837 // swapped.
13838 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13839 if (CCVal != ISD::SETULT)
13840 return SDValue();
13841
13842 SDValue CondLHS = Cond.getOperand(0);
13843 SDValue CondRHS = Cond.getOperand(1);
13844
13845 if (CondLHS != True)
13846 return SDValue();
13847
13848 unsigned ScalarBits = VT.getScalarSizeInBits();
13849
13850 // FIXME: Support other constants.
13851 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
13852 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
13853 return SDValue();
13854
13855 if (False.getOpcode() != ISD::SIGN_EXTEND)
13856 return SDValue();
13857
13858 False = False.getOperand(0);
13859
13860 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
13861 return SDValue();
13862
13863 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
13864 if (!FalseRHSC || !FalseRHSC->isZero())
13865 return SDValue();
13866
13867 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
13868 if (CCVal2 != ISD::SETGT)
13869 return SDValue();
13870
13871 // Emit the signed to unsigned saturation pattern.
13872 SDLoc DL(N);
13873 SDValue Max =
13874 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
13875 SDValue Min =
13876 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
13877 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
13878 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
13879}
13880
13882 const RISCVSubtarget &Subtarget) {
13883 SDValue N0 = N->getOperand(0);
13884 EVT VT = N->getValueType(0);
13885
13886 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13887 // extending X. This is safe since we only need the LSB after the shift and
13888 // shift amounts larger than 31 would produce poison. If we wait until
13889 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13890 // to use a BEXT instruction.
13891 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13892 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13893 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13894 SDLoc DL(N0);
13895 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13896 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13897 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13898 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13899 }
13900
13901 return combineTruncSelectToSMaxUSat(N, DAG);
13902}
13903
13904// Combines two comparison operation and logic operation to one selection
13905// operation(min, max) and logic operation. Returns new constructed Node if
13906// conditions for optimization are satisfied.
13909 const RISCVSubtarget &Subtarget) {
13910 SelectionDAG &DAG = DCI.DAG;
13911
13912 SDValue N0 = N->getOperand(0);
13913 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13914 // extending X. This is safe since we only need the LSB after the shift and
13915 // shift amounts larger than 31 would produce poison. If we wait until
13916 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13917 // to use a BEXT instruction.
13918 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13919 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13920 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13921 N0.hasOneUse()) {
13922 SDLoc DL(N);
13923 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13924 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13925 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13926 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13927 DAG.getConstant(1, DL, MVT::i64));
13928 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13929 }
13930
13931 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13932 return V;
13933 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13934 return V;
13935
13936 if (DCI.isAfterLegalizeDAG())
13937 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13938 return V;
13939
13940 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13941 // (select lhs, rhs, cc, x, (and x, y))
13942 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13943}
13944
13945// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13946// FIXME: Generalize to other binary operators with same operand.
13948 SelectionDAG &DAG) {
13949 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13950
13951 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13953 !N0.hasOneUse() || !N1.hasOneUse())
13954 return SDValue();
13955
13956 // Should have the same condition.
13957 SDValue Cond = N0.getOperand(1);
13958 if (Cond != N1.getOperand(1))
13959 return SDValue();
13960
13961 SDValue TrueV = N0.getOperand(0);
13962 SDValue FalseV = N1.getOperand(0);
13963
13964 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13965 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13966 !isOneConstant(TrueV.getOperand(1)) ||
13967 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13968 return SDValue();
13969
13970 EVT VT = N->getValueType(0);
13971 SDLoc DL(N);
13972
13973 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13974 Cond);
13975 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13976 Cond);
13977 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13978 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13979}
13980
13982 const RISCVSubtarget &Subtarget) {
13983 SelectionDAG &DAG = DCI.DAG;
13984
13985 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13986 return V;
13987 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13988 return V;
13989
13990 if (DCI.isAfterLegalizeDAG())
13991 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13992 return V;
13993
13994 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13995 // We may be able to pull a common operation out of the true and false value.
13996 SDValue N0 = N->getOperand(0);
13997 SDValue N1 = N->getOperand(1);
13998 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13999 return V;
14000 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14001 return V;
14002
14003 // fold (or (select cond, 0, y), x) ->
14004 // (select cond, x, (or x, y))
14005 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14006}
14007
14009 const RISCVSubtarget &Subtarget) {
14010 SDValue N0 = N->getOperand(0);
14011 SDValue N1 = N->getOperand(1);
14012
14013 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14014 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14015 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14016 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14017 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14018 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14019 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14020 SDLoc DL(N);
14021 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14022 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14023 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14024 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14025 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14026 }
14027
14028 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14029 // NOTE: Assumes ROL being legal means ROLW is legal.
14030 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14031 if (N0.getOpcode() == RISCVISD::SLLW &&
14033 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14034 SDLoc DL(N);
14035 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14036 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14037 }
14038
14039 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14040 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14041 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14043 if (ConstN00 && CC == ISD::SETLT) {
14044 EVT VT = N0.getValueType();
14045 SDLoc DL(N0);
14046 const APInt &Imm = ConstN00->getAPIntValue();
14047 if ((Imm + 1).isSignedIntN(12))
14048 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14049 DAG.getConstant(Imm + 1, DL, VT), CC);
14050 }
14051 }
14052
14053 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14054 return V;
14055 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14056 return V;
14057
14058 // fold (xor (select cond, 0, y), x) ->
14059 // (select cond, x, (xor x, y))
14060 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14061}
14062
14063// Try to expand a scalar multiply to a faster sequence.
14066 const RISCVSubtarget &Subtarget) {
14067
14068 EVT VT = N->getValueType(0);
14069
14070 // LI + MUL is usually smaller than the alternative sequence.
14072 return SDValue();
14073
14074 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14075 return SDValue();
14076
14077 if (VT != Subtarget.getXLenVT())
14078 return SDValue();
14079
14080 const bool HasShlAdd =
14081 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14082
14083 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14084 if (!CNode)
14085 return SDValue();
14086 uint64_t MulAmt = CNode->getZExtValue();
14087
14088 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14089 // We're adding additional uses of X here, and in principle, we should be freezing
14090 // X before doing so. However, adding freeze here causes real regressions, and no
14091 // other target properly freezes X in these cases either.
14092 SDValue X = N->getOperand(0);
14093
14094 if (HasShlAdd) {
14095 for (uint64_t Divisor : {3, 5, 9}) {
14096 if (MulAmt % Divisor != 0)
14097 continue;
14098 uint64_t MulAmt2 = MulAmt / Divisor;
14099 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14100 if (isPowerOf2_64(MulAmt2)) {
14101 SDLoc DL(N);
14102 SDValue X = N->getOperand(0);
14103 // Put the shift first if we can fold a zext into the
14104 // shift forming a slli.uw.
14105 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14106 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14107 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14108 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14109 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14110 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14111 Shl);
14112 }
14113 // Otherwise, put rhe shl second so that it can fold with following
14114 // instructions (e.g. sext or add).
14115 SDValue Mul359 =
14116 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14117 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14118 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14119 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14120 }
14121
14122 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14123 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14124 SDLoc DL(N);
14125 SDValue Mul359 =
14126 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14127 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14128 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14129 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14130 Mul359);
14131 }
14132 }
14133
14134 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14135 // shXadd. First check if this a sum of two power of 2s because that's
14136 // easy. Then count how many zeros are up to the first bit.
14137 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14138 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14139 if (ScaleShift >= 1 && ScaleShift < 4) {
14140 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14141 SDLoc DL(N);
14142 SDValue Shift1 =
14143 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14144 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14145 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14146 }
14147 }
14148
14149 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14150 // This is the two instruction form, there are also three instruction
14151 // variants we could implement. e.g.
14152 // (2^(1,2,3) * 3,5,9 + 1) << C2
14153 // 2^(C1>3) * 3,5,9 +/- 1
14154 for (uint64_t Divisor : {3, 5, 9}) {
14155 uint64_t C = MulAmt - 1;
14156 if (C <= Divisor)
14157 continue;
14158 unsigned TZ = llvm::countr_zero(C);
14159 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14160 SDLoc DL(N);
14161 SDValue Mul359 =
14162 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14163 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14164 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14165 DAG.getConstant(TZ, DL, VT), X);
14166 }
14167 }
14168
14169 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14170 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14171 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14172 if (ScaleShift >= 1 && ScaleShift < 4) {
14173 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14174 SDLoc DL(N);
14175 SDValue Shift1 =
14176 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14177 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14178 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14179 DAG.getConstant(ScaleShift, DL, VT), X));
14180 }
14181 }
14182
14183 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14184 for (uint64_t Offset : {3, 5, 9}) {
14185 if (isPowerOf2_64(MulAmt + Offset)) {
14186 SDLoc DL(N);
14187 SDValue Shift1 =
14188 DAG.getNode(ISD::SHL, DL, VT, X,
14189 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14190 SDValue Mul359 =
14191 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14192 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14193 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14194 }
14195 }
14196 }
14197
14198 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14199 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14200 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14201 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14202 SDLoc DL(N);
14203 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14204 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14205 SDValue Shift2 =
14206 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14207 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14208 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14209 }
14210
14211 if (HasShlAdd) {
14212 for (uint64_t Divisor : {3, 5, 9}) {
14213 if (MulAmt % Divisor != 0)
14214 continue;
14215 uint64_t MulAmt2 = MulAmt / Divisor;
14216 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14217 // of 25 which happen to be quite common.
14218 for (uint64_t Divisor2 : {3, 5, 9}) {
14219 if (MulAmt2 % Divisor2 != 0)
14220 continue;
14221 uint64_t MulAmt3 = MulAmt2 / Divisor2;
14222 if (isPowerOf2_64(MulAmt3)) {
14223 SDLoc DL(N);
14224 SDValue Mul359A =
14225 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14226 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14227 SDValue Mul359B = DAG.getNode(
14228 RISCVISD::SHL_ADD, DL, VT, Mul359A,
14229 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14230 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14231 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14232 }
14233 }
14234 }
14235 }
14236
14237 return SDValue();
14238}
14239
14240// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14241// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14242// Same for other equivalent types with other equivalent constants.
14244 EVT VT = N->getValueType(0);
14245 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14246
14247 // Do this for legal vectors unless they are i1 or i8 vectors.
14248 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14249 return SDValue();
14250
14251 if (N->getOperand(0).getOpcode() != ISD::AND ||
14252 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14253 return SDValue();
14254
14255 SDValue And = N->getOperand(0);
14256 SDValue Srl = And.getOperand(0);
14257
14258 APInt V1, V2, V3;
14259 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14260 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14262 return SDValue();
14263
14264 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14265 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14266 V3 != (HalfSize - 1))
14267 return SDValue();
14268
14269 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14270 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14271 VT.getVectorElementCount() * 2);
14272 SDLoc DL(N);
14273 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14274 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14275 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14276 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14277}
14278
14281 const RISCVSubtarget &Subtarget) {
14282 EVT VT = N->getValueType(0);
14283 if (!VT.isVector())
14284 return expandMul(N, DAG, DCI, Subtarget);
14285
14286 SDLoc DL(N);
14287 SDValue N0 = N->getOperand(0);
14288 SDValue N1 = N->getOperand(1);
14289 SDValue MulOper;
14290 unsigned AddSubOpc;
14291
14292 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14293 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14294 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14295 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14296 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14297 AddSubOpc = V->getOpcode();
14298 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14299 SDValue Opnd = V->getOperand(1);
14300 MulOper = V->getOperand(0);
14301 if (AddSubOpc == ISD::SUB)
14302 std::swap(Opnd, MulOper);
14303 if (isOneOrOneSplat(Opnd))
14304 return true;
14305 }
14306 return false;
14307 };
14308
14309 if (IsAddSubWith1(N0)) {
14310 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14311 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14312 }
14313
14314 if (IsAddSubWith1(N1)) {
14315 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14316 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14317 }
14318
14319 if (SDValue V = combineBinOpOfZExt(N, DAG))
14320 return V;
14321
14323 return V;
14324
14325 return SDValue();
14326}
14327
14328/// According to the property that indexed load/store instructions zero-extend
14329/// their indices, try to narrow the type of index operand.
14330static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14331 if (isIndexTypeSigned(IndexType))
14332 return false;
14333
14334 if (!N->hasOneUse())
14335 return false;
14336
14337 EVT VT = N.getValueType();
14338 SDLoc DL(N);
14339
14340 // In general, what we're doing here is seeing if we can sink a truncate to
14341 // a smaller element type into the expression tree building our index.
14342 // TODO: We can generalize this and handle a bunch more cases if useful.
14343
14344 // Narrow a buildvector to the narrowest element type. This requires less
14345 // work and less register pressure at high LMUL, and creates smaller constants
14346 // which may be cheaper to materialize.
14347 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14348 KnownBits Known = DAG.computeKnownBits(N);
14349 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14350 LLVMContext &C = *DAG.getContext();
14351 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14352 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14353 N = DAG.getNode(ISD::TRUNCATE, DL,
14354 VT.changeVectorElementType(ResultVT), N);
14355 return true;
14356 }
14357 }
14358
14359 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14360 if (N.getOpcode() != ISD::SHL)
14361 return false;
14362
14363 SDValue N0 = N.getOperand(0);
14364 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14366 return false;
14367 if (!N0->hasOneUse())
14368 return false;
14369
14370 APInt ShAmt;
14371 SDValue N1 = N.getOperand(1);
14372 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14373 return false;
14374
14375 SDValue Src = N0.getOperand(0);
14376 EVT SrcVT = Src.getValueType();
14377 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14378 unsigned ShAmtV = ShAmt.getZExtValue();
14379 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14380 NewElen = std::max(NewElen, 8U);
14381
14382 // Skip if NewElen is not narrower than the original extended type.
14383 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14384 return false;
14385
14386 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14387 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14388
14389 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14390 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14391 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14392 return true;
14393}
14394
14395// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14396// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14397// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14398// can become a sext.w instead of a shift pair.
14400 const RISCVSubtarget &Subtarget) {
14401 SDValue N0 = N->getOperand(0);
14402 SDValue N1 = N->getOperand(1);
14403 EVT VT = N->getValueType(0);
14404 EVT OpVT = N0.getValueType();
14405
14406 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14407 return SDValue();
14408
14409 // RHS needs to be a constant.
14410 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14411 if (!N1C)
14412 return SDValue();
14413
14414 // LHS needs to be (and X, 0xffffffff).
14415 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14417 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14418 return SDValue();
14419
14420 // Looking for an equality compare.
14421 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14422 if (!isIntEqualitySetCC(Cond))
14423 return SDValue();
14424
14425 // Don't do this if the sign bit is provably zero, it will be turned back into
14426 // an AND.
14427 APInt SignMask = APInt::getOneBitSet(64, 31);
14428 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14429 return SDValue();
14430
14431 const APInt &C1 = N1C->getAPIntValue();
14432
14433 SDLoc dl(N);
14434 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14435 // to be equal.
14436 if (C1.getActiveBits() > 32)
14437 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14438
14439 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14440 N0.getOperand(0), DAG.getValueType(MVT::i32));
14441 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14442 dl, OpVT), Cond);
14443}
14444
14445static SDValue
14447 const RISCVSubtarget &Subtarget) {
14448 SDValue Src = N->getOperand(0);
14449 EVT VT = N->getValueType(0);
14450
14451 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14452 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14453 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
14454 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16) &&
14455 Subtarget.hasStdExtZfhmin())
14456 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14457 Src.getOperand(0));
14458
14459 return SDValue();
14460}
14461
14462namespace {
14463// Forward declaration of the structure holding the necessary information to
14464// apply a combine.
14465struct CombineResult;
14466
14467enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14468/// Helper class for folding sign/zero extensions.
14469/// In particular, this class is used for the following combines:
14470/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14471/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14472/// mul | mul_vl -> vwmul(u) | vwmul_su
14473/// shl | shl_vl -> vwsll
14474/// fadd -> vfwadd | vfwadd_w
14475/// fsub -> vfwsub | vfwsub_w
14476/// fmul -> vfwmul
14477/// An object of this class represents an operand of the operation we want to
14478/// combine.
14479/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14480/// NodeExtensionHelper for `a` and one for `b`.
14481///
14482/// This class abstracts away how the extension is materialized and
14483/// how its number of users affect the combines.
14484///
14485/// In particular:
14486/// - VWADD_W is conceptually == add(op0, sext(op1))
14487/// - VWADDU_W == add(op0, zext(op1))
14488/// - VWSUB_W == sub(op0, sext(op1))
14489/// - VWSUBU_W == sub(op0, zext(op1))
14490/// - VFWADD_W == fadd(op0, fpext(op1))
14491/// - VFWSUB_W == fsub(op0, fpext(op1))
14492/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
14493/// zext|sext(smaller_value).
14494struct NodeExtensionHelper {
14495 /// Records if this operand is like being zero extended.
14496 bool SupportsZExt;
14497 /// Records if this operand is like being sign extended.
14498 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
14499 /// instance, a splat constant (e.g., 3), would support being both sign and
14500 /// zero extended.
14501 bool SupportsSExt;
14502 /// Records if this operand is like being floating-Point extended.
14503 bool SupportsFPExt;
14504 /// This boolean captures whether we care if this operand would still be
14505 /// around after the folding happens.
14506 bool EnforceOneUse;
14507 /// Original value that this NodeExtensionHelper represents.
14508 SDValue OrigOperand;
14509
14510 /// Get the value feeding the extension or the value itself.
14511 /// E.g., for zext(a), this would return a.
14512 SDValue getSource() const {
14513 switch (OrigOperand.getOpcode()) {
14514 case ISD::ZERO_EXTEND:
14515 case ISD::SIGN_EXTEND:
14516 case RISCVISD::VSEXT_VL:
14517 case RISCVISD::VZEXT_VL:
14519 return OrigOperand.getOperand(0);
14520 default:
14521 return OrigOperand;
14522 }
14523 }
14524
14525 /// Check if this instance represents a splat.
14526 bool isSplat() const {
14527 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
14528 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
14529 }
14530
14531 /// Get the extended opcode.
14532 unsigned getExtOpc(ExtKind SupportsExt) const {
14533 switch (SupportsExt) {
14534 case ExtKind::SExt:
14535 return RISCVISD::VSEXT_VL;
14536 case ExtKind::ZExt:
14537 return RISCVISD::VZEXT_VL;
14538 case ExtKind::FPExt:
14540 }
14541 llvm_unreachable("Unknown ExtKind enum");
14542 }
14543
14544 /// Get or create a value that can feed \p Root with the given extension \p
14545 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
14546 /// operand. \see ::getSource().
14547 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
14548 const RISCVSubtarget &Subtarget,
14549 std::optional<ExtKind> SupportsExt) const {
14550 if (!SupportsExt.has_value())
14551 return OrigOperand;
14552
14553 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
14554
14555 SDValue Source = getSource();
14556 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
14557 if (Source.getValueType() == NarrowVT)
14558 return Source;
14559
14560 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
14561 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
14562 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
14563 Root->getOpcode() == RISCVISD::VFMADD_VL);
14564 return Source;
14565 }
14566
14567 unsigned ExtOpc = getExtOpc(*SupportsExt);
14568
14569 // If we need an extension, we should be changing the type.
14570 SDLoc DL(OrigOperand);
14571 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
14572 switch (OrigOperand.getOpcode()) {
14573 case ISD::ZERO_EXTEND:
14574 case ISD::SIGN_EXTEND:
14575 case RISCVISD::VSEXT_VL:
14576 case RISCVISD::VZEXT_VL:
14578 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
14579 case ISD::SPLAT_VECTOR:
14580 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
14582 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
14583 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
14585 Source = Source.getOperand(1);
14586 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
14587 Source = Source.getOperand(0);
14588 assert(Source.getValueType() == NarrowVT.getVectorElementType());
14589 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
14590 DAG.getUNDEF(NarrowVT), Source, VL);
14591 default:
14592 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
14593 // and that operand should already have the right NarrowVT so no
14594 // extension should be required at this point.
14595 llvm_unreachable("Unsupported opcode");
14596 }
14597 }
14598
14599 /// Helper function to get the narrow type for \p Root.
14600 /// The narrow type is the type of \p Root where we divided the size of each
14601 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
14602 /// \pre Both the narrow type and the original type should be legal.
14603 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
14604 MVT VT = Root->getSimpleValueType(0);
14605
14606 // Determine the narrow size.
14607 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14608
14609 MVT EltVT = SupportsExt == ExtKind::FPExt
14610 ? MVT::getFloatingPointVT(NarrowSize)
14611 : MVT::getIntegerVT(NarrowSize);
14612
14613 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
14614 "Trying to extend something we can't represent");
14615 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
14616 return NarrowVT;
14617 }
14618
14619 /// Get the opcode to materialize:
14620 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
14621 static unsigned getSExtOpcode(unsigned Opcode) {
14622 switch (Opcode) {
14623 case ISD::ADD:
14624 case RISCVISD::ADD_VL:
14627 case ISD::OR:
14628 return RISCVISD::VWADD_VL;
14629 case ISD::SUB:
14630 case RISCVISD::SUB_VL:
14633 return RISCVISD::VWSUB_VL;
14634 case ISD::MUL:
14635 case RISCVISD::MUL_VL:
14636 return RISCVISD::VWMUL_VL;
14637 default:
14638 llvm_unreachable("Unexpected opcode");
14639 }
14640 }
14641
14642 /// Get the opcode to materialize:
14643 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
14644 static unsigned getZExtOpcode(unsigned Opcode) {
14645 switch (Opcode) {
14646 case ISD::ADD:
14647 case RISCVISD::ADD_VL:
14650 case ISD::OR:
14651 return RISCVISD::VWADDU_VL;
14652 case ISD::SUB:
14653 case RISCVISD::SUB_VL:
14656 return RISCVISD::VWSUBU_VL;
14657 case ISD::MUL:
14658 case RISCVISD::MUL_VL:
14659 return RISCVISD::VWMULU_VL;
14660 case ISD::SHL:
14661 case RISCVISD::SHL_VL:
14662 return RISCVISD::VWSLL_VL;
14663 default:
14664 llvm_unreachable("Unexpected opcode");
14665 }
14666 }
14667
14668 /// Get the opcode to materialize:
14669 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
14670 static unsigned getFPExtOpcode(unsigned Opcode) {
14671 switch (Opcode) {
14672 case RISCVISD::FADD_VL:
14674 return RISCVISD::VFWADD_VL;
14675 case RISCVISD::FSUB_VL:
14677 return RISCVISD::VFWSUB_VL;
14678 case RISCVISD::FMUL_VL:
14679 return RISCVISD::VFWMUL_VL;
14681 return RISCVISD::VFWMADD_VL;
14683 return RISCVISD::VFWMSUB_VL;
14685 return RISCVISD::VFWNMADD_VL;
14687 return RISCVISD::VFWNMSUB_VL;
14688 default:
14689 llvm_unreachable("Unexpected opcode");
14690 }
14691 }
14692
14693 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
14694 /// newOpcode(a, b).
14695 static unsigned getSUOpcode(unsigned Opcode) {
14696 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
14697 "SU is only supported for MUL");
14698 return RISCVISD::VWMULSU_VL;
14699 }
14700
14701 /// Get the opcode to materialize
14702 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
14703 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
14704 switch (Opcode) {
14705 case ISD::ADD:
14706 case RISCVISD::ADD_VL:
14707 case ISD::OR:
14708 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
14710 case ISD::SUB:
14711 case RISCVISD::SUB_VL:
14712 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
14714 case RISCVISD::FADD_VL:
14715 return RISCVISD::VFWADD_W_VL;
14716 case RISCVISD::FSUB_VL:
14717 return RISCVISD::VFWSUB_W_VL;
14718 default:
14719 llvm_unreachable("Unexpected opcode");
14720 }
14721 }
14722
14723 using CombineToTry = std::function<std::optional<CombineResult>(
14724 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
14725 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
14726 const RISCVSubtarget &)>;
14727
14728 /// Check if this node needs to be fully folded or extended for all users.
14729 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
14730
14731 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
14732 const RISCVSubtarget &Subtarget) {
14733 unsigned Opc = OrigOperand.getOpcode();
14734 MVT VT = OrigOperand.getSimpleValueType();
14735
14736 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
14737 "Unexpected Opcode");
14738
14739 // The pasthru must be undef for tail agnostic.
14740 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
14741 return;
14742
14743 // Get the scalar value.
14744 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
14745 : OrigOperand.getOperand(1);
14746
14747 // See if we have enough sign bits or zero bits in the scalar to use a
14748 // widening opcode by splatting to smaller element size.
14749 unsigned EltBits = VT.getScalarSizeInBits();
14750 unsigned ScalarBits = Op.getValueSizeInBits();
14751 // If we're not getting all bits from the element, we need special handling.
14752 if (ScalarBits < EltBits) {
14753 // This should only occur on RV32.
14754 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
14755 !Subtarget.is64Bit() && "Unexpected splat");
14756 // vmv.v.x sign extends narrow inputs.
14757 SupportsSExt = true;
14758
14759 // If the input is positive, then sign extend is also zero extend.
14760 if (DAG.SignBitIsZero(Op))
14761 SupportsZExt = true;
14762
14763 EnforceOneUse = false;
14764 return;
14765 }
14766
14767 unsigned NarrowSize = EltBits / 2;
14768 // If the narrow type cannot be expressed with a legal VMV,
14769 // this is not a valid candidate.
14770 if (NarrowSize < 8)
14771 return;
14772
14773 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
14774 SupportsSExt = true;
14775
14776 if (DAG.MaskedValueIsZero(Op,
14777 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
14778 SupportsZExt = true;
14779
14780 EnforceOneUse = false;
14781 }
14782
14783 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
14784 const RISCVSubtarget &Subtarget) {
14785 // Any f16 extension will neeed zvfh
14786 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
14787 return false;
14788 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
14789 // zvfbfwma
14790 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
14791 Root->getOpcode() != RISCVISD::VFMADD_VL))
14792 return false;
14793 return true;
14794 }
14795
14796 /// Helper method to set the various fields of this struct based on the
14797 /// type of \p Root.
14798 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
14799 const RISCVSubtarget &Subtarget) {
14800 SupportsZExt = false;
14801 SupportsSExt = false;
14802 SupportsFPExt = false;
14803 EnforceOneUse = true;
14804 unsigned Opc = OrigOperand.getOpcode();
14805 // For the nodes we handle below, we end up using their inputs directly: see
14806 // getSource(). However since they either don't have a passthru or we check
14807 // that their passthru is undef, we can safely ignore their mask and VL.
14808 switch (Opc) {
14809 case ISD::ZERO_EXTEND:
14810 case ISD::SIGN_EXTEND: {
14811 MVT VT = OrigOperand.getSimpleValueType();
14812 if (!VT.isVector())
14813 break;
14814
14815 SDValue NarrowElt = OrigOperand.getOperand(0);
14816 MVT NarrowVT = NarrowElt.getSimpleValueType();
14817 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14818 if (NarrowVT.getVectorElementType() == MVT::i1)
14819 break;
14820
14821 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14822 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14823 break;
14824 }
14825 case RISCVISD::VZEXT_VL:
14826 SupportsZExt = true;
14827 break;
14828 case RISCVISD::VSEXT_VL:
14829 SupportsSExt = true;
14830 break;
14832 MVT NarrowEltVT =
14834 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
14835 break;
14836 SupportsFPExt = true;
14837 break;
14838 }
14839 case ISD::SPLAT_VECTOR:
14841 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14842 break;
14843 case RISCVISD::VFMV_V_F_VL: {
14844 MVT VT = OrigOperand.getSimpleValueType();
14845
14846 if (!OrigOperand.getOperand(0).isUndef())
14847 break;
14848
14849 SDValue Op = OrigOperand.getOperand(1);
14850 if (Op.getOpcode() != ISD::FP_EXTEND)
14851 break;
14852
14853 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
14854 Subtarget))
14855 break;
14856
14857 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14858 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
14859 if (NarrowSize != ScalarBits)
14860 break;
14861
14862 SupportsFPExt = true;
14863 break;
14864 }
14865 default:
14866 break;
14867 }
14868 }
14869
14870 /// Check if \p Root supports any extension folding combines.
14871 static bool isSupportedRoot(const SDNode *Root,
14872 const RISCVSubtarget &Subtarget) {
14873 switch (Root->getOpcode()) {
14874 case ISD::ADD:
14875 case ISD::SUB:
14876 case ISD::MUL: {
14877 return Root->getValueType(0).isScalableVector();
14878 }
14879 case ISD::OR: {
14880 return Root->getValueType(0).isScalableVector() &&
14881 Root->getFlags().hasDisjoint();
14882 }
14883 // Vector Widening Integer Add/Sub/Mul Instructions
14884 case RISCVISD::ADD_VL:
14885 case RISCVISD::MUL_VL:
14888 case RISCVISD::SUB_VL:
14891 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14892 case RISCVISD::FADD_VL:
14893 case RISCVISD::FSUB_VL:
14894 case RISCVISD::FMUL_VL:
14897 return true;
14898 case ISD::SHL:
14899 return Root->getValueType(0).isScalableVector() &&
14900 Subtarget.hasStdExtZvbb();
14901 case RISCVISD::SHL_VL:
14902 return Subtarget.hasStdExtZvbb();
14907 return true;
14908 default:
14909 return false;
14910 }
14911 }
14912
14913 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14914 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14915 const RISCVSubtarget &Subtarget) {
14916 assert(isSupportedRoot(Root, Subtarget) &&
14917 "Trying to build an helper with an "
14918 "unsupported root");
14919 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14921 OrigOperand = Root->getOperand(OperandIdx);
14922
14923 unsigned Opc = Root->getOpcode();
14924 switch (Opc) {
14925 // We consider
14926 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14927 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14928 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14935 if (OperandIdx == 1) {
14936 SupportsZExt =
14938 SupportsSExt =
14940 SupportsFPExt =
14942 // There's no existing extension here, so we don't have to worry about
14943 // making sure it gets removed.
14944 EnforceOneUse = false;
14945 break;
14946 }
14947 [[fallthrough]];
14948 default:
14949 fillUpExtensionSupport(Root, DAG, Subtarget);
14950 break;
14951 }
14952 }
14953
14954 /// Helper function to get the Mask and VL from \p Root.
14955 static std::pair<SDValue, SDValue>
14956 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14957 const RISCVSubtarget &Subtarget) {
14958 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14959 switch (Root->getOpcode()) {
14960 case ISD::ADD:
14961 case ISD::SUB:
14962 case ISD::MUL:
14963 case ISD::OR:
14964 case ISD::SHL: {
14965 SDLoc DL(Root);
14966 MVT VT = Root->getSimpleValueType(0);
14967 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14968 }
14969 default:
14970 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14971 }
14972 }
14973
14974 /// Helper function to check if \p N is commutative with respect to the
14975 /// foldings that are supported by this class.
14976 static bool isCommutative(const SDNode *N) {
14977 switch (N->getOpcode()) {
14978 case ISD::ADD:
14979 case ISD::MUL:
14980 case ISD::OR:
14981 case RISCVISD::ADD_VL:
14982 case RISCVISD::MUL_VL:
14985 case RISCVISD::FADD_VL:
14986 case RISCVISD::FMUL_VL:
14992 return true;
14993 case ISD::SUB:
14994 case RISCVISD::SUB_VL:
14997 case RISCVISD::FSUB_VL:
14999 case ISD::SHL:
15000 case RISCVISD::SHL_VL:
15001 return false;
15002 default:
15003 llvm_unreachable("Unexpected opcode");
15004 }
15005 }
15006
15007 /// Get a list of combine to try for folding extensions in \p Root.
15008 /// Note that each returned CombineToTry function doesn't actually modify
15009 /// anything. Instead they produce an optional CombineResult that if not None,
15010 /// need to be materialized for the combine to be applied.
15011 /// \see CombineResult::materialize.
15012 /// If the related CombineToTry function returns std::nullopt, that means the
15013 /// combine didn't match.
15014 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15015};
15016
15017/// Helper structure that holds all the necessary information to materialize a
15018/// combine that does some extension folding.
15019struct CombineResult {
15020 /// Opcode to be generated when materializing the combine.
15021 unsigned TargetOpcode;
15022 // No value means no extension is needed.
15023 std::optional<ExtKind> LHSExt;
15024 std::optional<ExtKind> RHSExt;
15025 /// Root of the combine.
15026 SDNode *Root;
15027 /// LHS of the TargetOpcode.
15028 NodeExtensionHelper LHS;
15029 /// RHS of the TargetOpcode.
15030 NodeExtensionHelper RHS;
15031
15032 CombineResult(unsigned TargetOpcode, SDNode *Root,
15033 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15034 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15035 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15036 LHS(LHS), RHS(RHS) {}
15037
15038 /// Return a value that uses TargetOpcode and that can be used to replace
15039 /// Root.
15040 /// The actual replacement is *not* done in that method.
15041 SDValue materialize(SelectionDAG &DAG,
15042 const RISCVSubtarget &Subtarget) const {
15043 SDValue Mask, VL, Passthru;
15044 std::tie(Mask, VL) =
15045 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15046 switch (Root->getOpcode()) {
15047 default:
15048 Passthru = Root->getOperand(2);
15049 break;
15050 case ISD::ADD:
15051 case ISD::SUB:
15052 case ISD::MUL:
15053 case ISD::OR:
15054 case ISD::SHL:
15055 Passthru = DAG.getUNDEF(Root->getValueType(0));
15056 break;
15057 }
15058 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15059 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15060 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15061 Passthru, Mask, VL);
15062 }
15063};
15064
15065/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15066/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15067/// are zext) and LHS and RHS can be folded into Root.
15068/// AllowExtMask define which form `ext` can take in this pattern.
15069///
15070/// \note If the pattern can match with both zext and sext, the returned
15071/// CombineResult will feature the zext result.
15072///
15073/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15074/// can be used to apply the pattern.
15075static std::optional<CombineResult>
15076canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15077 const NodeExtensionHelper &RHS,
15078 uint8_t AllowExtMask, SelectionDAG &DAG,
15079 const RISCVSubtarget &Subtarget) {
15080 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15081 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15082 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15083 /*RHSExt=*/{ExtKind::ZExt});
15084 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15085 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15086 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15087 /*RHSExt=*/{ExtKind::SExt});
15088 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15089 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15090 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15091 /*RHSExt=*/{ExtKind::FPExt});
15092 return std::nullopt;
15093}
15094
15095/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15096/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15097/// are zext) and LHS and RHS can be folded into Root.
15098///
15099/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15100/// can be used to apply the pattern.
15101static std::optional<CombineResult>
15102canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15103 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15104 const RISCVSubtarget &Subtarget) {
15105 return canFoldToVWWithSameExtensionImpl(
15106 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15107 Subtarget);
15108}
15109
15110/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15111///
15112/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15113/// can be used to apply the pattern.
15114static std::optional<CombineResult>
15115canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15116 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15117 const RISCVSubtarget &Subtarget) {
15118 if (RHS.SupportsFPExt)
15119 return CombineResult(
15120 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15121 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15122
15123 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15124 // sext/zext?
15125 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15126 // purposes.
15127 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15128 return CombineResult(
15129 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15130 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15131 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15132 return CombineResult(
15133 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15134 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15135 return std::nullopt;
15136}
15137
15138/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15139///
15140/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15141/// can be used to apply the pattern.
15142static std::optional<CombineResult>
15143canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15144 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15145 const RISCVSubtarget &Subtarget) {
15146 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15147 Subtarget);
15148}
15149
15150/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15151///
15152/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15153/// can be used to apply the pattern.
15154static std::optional<CombineResult>
15155canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15156 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15157 const RISCVSubtarget &Subtarget) {
15158 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15159 Subtarget);
15160}
15161
15162/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15163///
15164/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15165/// can be used to apply the pattern.
15166static std::optional<CombineResult>
15167canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15168 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15169 const RISCVSubtarget &Subtarget) {
15170 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15171 Subtarget);
15172}
15173
15174/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15175///
15176/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15177/// can be used to apply the pattern.
15178static std::optional<CombineResult>
15179canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15180 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15181 const RISCVSubtarget &Subtarget) {
15182
15183 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15184 return std::nullopt;
15185 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15186 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15187 /*RHSExt=*/{ExtKind::ZExt});
15188}
15189
15191NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15192 SmallVector<CombineToTry> Strategies;
15193 switch (Root->getOpcode()) {
15194 case ISD::ADD:
15195 case ISD::SUB:
15196 case ISD::OR:
15197 case RISCVISD::ADD_VL:
15198 case RISCVISD::SUB_VL:
15199 case RISCVISD::FADD_VL:
15200 case RISCVISD::FSUB_VL:
15201 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15202 Strategies.push_back(canFoldToVWWithSameExtension);
15203 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15204 Strategies.push_back(canFoldToVW_W);
15205 break;
15206 case RISCVISD::FMUL_VL:
15211 Strategies.push_back(canFoldToVWWithSameExtension);
15212 break;
15213 case ISD::MUL:
15214 case RISCVISD::MUL_VL:
15215 // mul -> vwmul(u)
15216 Strategies.push_back(canFoldToVWWithSameExtension);
15217 // mul -> vwmulsu
15218 Strategies.push_back(canFoldToVW_SU);
15219 break;
15220 case ISD::SHL:
15221 case RISCVISD::SHL_VL:
15222 // shl -> vwsll
15223 Strategies.push_back(canFoldToVWWithZEXT);
15224 break;
15227 // vwadd_w|vwsub_w -> vwadd|vwsub
15228 Strategies.push_back(canFoldToVWWithSEXT);
15229 break;
15232 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15233 Strategies.push_back(canFoldToVWWithZEXT);
15234 break;
15237 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15238 Strategies.push_back(canFoldToVWWithFPEXT);
15239 break;
15240 default:
15241 llvm_unreachable("Unexpected opcode");
15242 }
15243 return Strategies;
15244}
15245} // End anonymous namespace.
15246
15247/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15248/// The supported combines are:
15249/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15250/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15251/// mul | mul_vl -> vwmul(u) | vwmul_su
15252/// shl | shl_vl -> vwsll
15253/// fadd_vl -> vfwadd | vfwadd_w
15254/// fsub_vl -> vfwsub | vfwsub_w
15255/// fmul_vl -> vfwmul
15256/// vwadd_w(u) -> vwadd(u)
15257/// vwsub_w(u) -> vwsub(u)
15258/// vfwadd_w -> vfwadd
15259/// vfwsub_w -> vfwsub
15262 const RISCVSubtarget &Subtarget) {
15263 SelectionDAG &DAG = DCI.DAG;
15264 if (DCI.isBeforeLegalize())
15265 return SDValue();
15266
15267 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15268 return SDValue();
15269
15270 SmallVector<SDNode *> Worklist;
15271 SmallSet<SDNode *, 8> Inserted;
15272 Worklist.push_back(N);
15273 Inserted.insert(N);
15274 SmallVector<CombineResult> CombinesToApply;
15275
15276 while (!Worklist.empty()) {
15277 SDNode *Root = Worklist.pop_back_val();
15278
15279 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15280 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15281 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15282 &Inserted](const NodeExtensionHelper &Op) {
15283 if (Op.needToPromoteOtherUsers()) {
15284 for (SDNode::use_iterator UI = Op.OrigOperand->use_begin(),
15285 UE = Op.OrigOperand->use_end();
15286 UI != UE; ++UI) {
15287 SDNode *TheUse = *UI;
15288 if (!NodeExtensionHelper::isSupportedRoot(TheUse, Subtarget))
15289 return false;
15290 // We only support the first 2 operands of FMA.
15291 if (UI.getOperandNo() >= 2)
15292 return false;
15293 if (Inserted.insert(TheUse).second)
15294 Worklist.push_back(TheUse);
15295 }
15296 }
15297 return true;
15298 };
15299
15300 // Control the compile time by limiting the number of node we look at in
15301 // total.
15302 if (Inserted.size() > ExtensionMaxWebSize)
15303 return SDValue();
15304
15306 NodeExtensionHelper::getSupportedFoldings(Root);
15307
15308 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15309 bool Matched = false;
15310 for (int Attempt = 0;
15311 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15312 ++Attempt) {
15313
15314 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15315 FoldingStrategies) {
15316 std::optional<CombineResult> Res =
15317 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15318 if (Res) {
15319 Matched = true;
15320 CombinesToApply.push_back(*Res);
15321 // All the inputs that are extended need to be folded, otherwise
15322 // we would be leaving the old input (since it is may still be used),
15323 // and the new one.
15324 if (Res->LHSExt.has_value())
15325 if (!AppendUsersIfNeeded(LHS))
15326 return SDValue();
15327 if (Res->RHSExt.has_value())
15328 if (!AppendUsersIfNeeded(RHS))
15329 return SDValue();
15330 break;
15331 }
15332 }
15333 std::swap(LHS, RHS);
15334 }
15335 // Right now we do an all or nothing approach.
15336 if (!Matched)
15337 return SDValue();
15338 }
15339 // Store the value for the replacement of the input node separately.
15340 SDValue InputRootReplacement;
15341 // We do the RAUW after we materialize all the combines, because some replaced
15342 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15343 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15344 // yet-to-be-visited CombinesToApply roots.
15346 ValuesToReplace.reserve(CombinesToApply.size());
15347 for (CombineResult Res : CombinesToApply) {
15348 SDValue NewValue = Res.materialize(DAG, Subtarget);
15349 if (!InputRootReplacement) {
15350 assert(Res.Root == N &&
15351 "First element is expected to be the current node");
15352 InputRootReplacement = NewValue;
15353 } else {
15354 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15355 }
15356 }
15357 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15358 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15359 DCI.AddToWorklist(OldNewValues.second.getNode());
15360 }
15361 return InputRootReplacement;
15362}
15363
15364// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15365// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15366// y will be the Passthru and cond will be the Mask.
15368 unsigned Opc = N->getOpcode();
15371
15372 SDValue Y = N->getOperand(0);
15373 SDValue MergeOp = N->getOperand(1);
15374 unsigned MergeOpc = MergeOp.getOpcode();
15375
15376 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15377 return SDValue();
15378
15379 SDValue X = MergeOp->getOperand(1);
15380
15381 if (!MergeOp.hasOneUse())
15382 return SDValue();
15383
15384 // Passthru should be undef
15385 SDValue Passthru = N->getOperand(2);
15386 if (!Passthru.isUndef())
15387 return SDValue();
15388
15389 // Mask should be all ones
15390 SDValue Mask = N->getOperand(3);
15391 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15392 return SDValue();
15393
15394 // False value of MergeOp should be all zeros
15395 SDValue Z = MergeOp->getOperand(2);
15396
15397 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15398 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15399 Z = Z.getOperand(1);
15400
15401 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15402 return SDValue();
15403
15404 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15405 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15406 N->getFlags());
15407}
15408
15411 const RISCVSubtarget &Subtarget) {
15412 [[maybe_unused]] unsigned Opc = N->getOpcode();
15415
15416 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15417 return V;
15418
15419 return combineVWADDSUBWSelect(N, DCI.DAG);
15420}
15421
15422// Helper function for performMemPairCombine.
15423// Try to combine the memory loads/stores LSNode1 and LSNode2
15424// into a single memory pair operation.
15426 LSBaseSDNode *LSNode2, SDValue BasePtr,
15427 uint64_t Imm) {
15429 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15430
15431 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15432 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15433 return SDValue();
15434
15436 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15437
15438 // The new operation has twice the width.
15439 MVT XLenVT = Subtarget.getXLenVT();
15440 EVT MemVT = LSNode1->getMemoryVT();
15441 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15442 MachineMemOperand *MMO = LSNode1->getMemOperand();
15444 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15445
15446 if (LSNode1->getOpcode() == ISD::LOAD) {
15447 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15448 unsigned Opcode;
15449 if (MemVT == MVT::i32)
15450 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15451 else
15452 Opcode = RISCVISD::TH_LDD;
15453
15454 SDValue Res = DAG.getMemIntrinsicNode(
15455 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15456 {LSNode1->getChain(), BasePtr,
15457 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15458 NewMemVT, NewMMO);
15459
15460 SDValue Node1 =
15461 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15462 SDValue Node2 =
15463 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15464
15465 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15466 return Node1;
15467 } else {
15468 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15469
15470 SDValue Res = DAG.getMemIntrinsicNode(
15471 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15472 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15473 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15474 NewMemVT, NewMMO);
15475
15476 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15477 return Res;
15478 }
15479}
15480
15481// Try to combine two adjacent loads/stores to a single pair instruction from
15482// the XTHeadMemPair vendor extension.
15485 SelectionDAG &DAG = DCI.DAG;
15487 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15488
15489 // Target does not support load/store pair.
15490 if (!Subtarget.hasVendorXTHeadMemPair())
15491 return SDValue();
15492
15493 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15494 EVT MemVT = LSNode1->getMemoryVT();
15495 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
15496
15497 // No volatile, indexed or atomic loads/stores.
15498 if (!LSNode1->isSimple() || LSNode1->isIndexed())
15499 return SDValue();
15500
15501 // Function to get a base + constant representation from a memory value.
15502 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
15503 if (Ptr->getOpcode() == ISD::ADD)
15504 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
15505 return {Ptr->getOperand(0), C1->getZExtValue()};
15506 return {Ptr, 0};
15507 };
15508
15509 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
15510
15511 SDValue Chain = N->getOperand(0);
15512 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
15513 UI != UE; ++UI) {
15514 SDUse &Use = UI.getUse();
15515 if (Use.getUser() != N && Use.getResNo() == 0 &&
15516 Use.getUser()->getOpcode() == N->getOpcode()) {
15518
15519 // No volatile, indexed or atomic loads/stores.
15520 if (!LSNode2->isSimple() || LSNode2->isIndexed())
15521 continue;
15522
15523 // Check if LSNode1 and LSNode2 have the same type and extension.
15524 if (LSNode1->getOpcode() == ISD::LOAD)
15525 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
15527 continue;
15528
15529 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
15530 continue;
15531
15532 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
15533
15534 // Check if the base pointer is the same for both instruction.
15535 if (Base1 != Base2)
15536 continue;
15537
15538 // Check if the offsets match the XTHeadMemPair encoding contraints.
15539 bool Valid = false;
15540 if (MemVT == MVT::i32) {
15541 // Check for adjacent i32 values and a 2-bit index.
15542 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
15543 Valid = true;
15544 } else if (MemVT == MVT::i64) {
15545 // Check for adjacent i64 values and a 2-bit index.
15546 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
15547 Valid = true;
15548 }
15549
15550 if (!Valid)
15551 continue;
15552
15553 // Try to combine.
15554 if (SDValue Res =
15555 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
15556 return Res;
15557 }
15558 }
15559
15560 return SDValue();
15561}
15562
15563// Fold
15564// (fp_to_int (froundeven X)) -> fcvt X, rne
15565// (fp_to_int (ftrunc X)) -> fcvt X, rtz
15566// (fp_to_int (ffloor X)) -> fcvt X, rdn
15567// (fp_to_int (fceil X)) -> fcvt X, rup
15568// (fp_to_int (fround X)) -> fcvt X, rmm
15569// (fp_to_int (frint X)) -> fcvt X
15572 const RISCVSubtarget &Subtarget) {
15573 SelectionDAG &DAG = DCI.DAG;
15574 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15575 MVT XLenVT = Subtarget.getXLenVT();
15576
15577 SDValue Src = N->getOperand(0);
15578
15579 // Don't do this for strict-fp Src.
15580 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15581 return SDValue();
15582
15583 // Ensure the FP type is legal.
15584 if (!TLI.isTypeLegal(Src.getValueType()))
15585 return SDValue();
15586
15587 // Don't do this for f16 with Zfhmin and not Zfh.
15588 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15589 return SDValue();
15590
15591 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15592 // If the result is invalid, we didn't find a foldable instruction.
15593 if (FRM == RISCVFPRndMode::Invalid)
15594 return SDValue();
15595
15596 SDLoc DL(N);
15597 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
15598 EVT VT = N->getValueType(0);
15599
15600 if (VT.isVector() && TLI.isTypeLegal(VT)) {
15601 MVT SrcVT = Src.getSimpleValueType();
15602 MVT SrcContainerVT = SrcVT;
15603 MVT ContainerVT = VT.getSimpleVT();
15604 SDValue XVal = Src.getOperand(0);
15605
15606 // For widening and narrowing conversions we just combine it into a
15607 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
15608 // end up getting lowered to their appropriate pseudo instructions based on
15609 // their operand types
15610 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
15611 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
15612 return SDValue();
15613
15614 // Make fixed-length vectors scalable first
15615 if (SrcVT.isFixedLengthVector()) {
15616 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
15617 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
15618 ContainerVT =
15619 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
15620 }
15621
15622 auto [Mask, VL] =
15623 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
15624
15625 SDValue FpToInt;
15626 if (FRM == RISCVFPRndMode::RTZ) {
15627 // Use the dedicated trunc static rounding mode if we're truncating so we
15628 // don't need to generate calls to fsrmi/fsrm
15629 unsigned Opc =
15631 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15632 } else if (FRM == RISCVFPRndMode::DYN) {
15633 unsigned Opc =
15635 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
15636 } else {
15637 unsigned Opc =
15639 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
15640 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
15641 }
15642
15643 // If converted from fixed-length to scalable, convert back
15644 if (VT.isFixedLengthVector())
15645 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
15646
15647 return FpToInt;
15648 }
15649
15650 // Only handle XLen or i32 types. Other types narrower than XLen will
15651 // eventually be legalized to XLenVT.
15652 if (VT != MVT::i32 && VT != XLenVT)
15653 return SDValue();
15654
15655 unsigned Opc;
15656 if (VT == XLenVT)
15657 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15658 else
15660
15661 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
15662 DAG.getTargetConstant(FRM, DL, XLenVT));
15663 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
15664}
15665
15666// Fold
15667// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
15668// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
15669// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
15670// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
15671// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
15672// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
15675 const RISCVSubtarget &Subtarget) {
15676 SelectionDAG &DAG = DCI.DAG;
15677 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15678 MVT XLenVT = Subtarget.getXLenVT();
15679
15680 // Only handle XLen types. Other types narrower than XLen will eventually be
15681 // legalized to XLenVT.
15682 EVT DstVT = N->getValueType(0);
15683 if (DstVT != XLenVT)
15684 return SDValue();
15685
15686 SDValue Src = N->getOperand(0);
15687
15688 // Don't do this for strict-fp Src.
15689 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15690 return SDValue();
15691
15692 // Ensure the FP type is also legal.
15693 if (!TLI.isTypeLegal(Src.getValueType()))
15694 return SDValue();
15695
15696 // Don't do this for f16 with Zfhmin and not Zfh.
15697 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15698 return SDValue();
15699
15700 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15701
15702 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15703 if (FRM == RISCVFPRndMode::Invalid)
15704 return SDValue();
15705
15706 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
15707
15708 unsigned Opc;
15709 if (SatVT == DstVT)
15710 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15711 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
15713 else
15714 return SDValue();
15715 // FIXME: Support other SatVTs by clamping before or after the conversion.
15716
15717 Src = Src.getOperand(0);
15718
15719 SDLoc DL(N);
15720 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
15721 DAG.getTargetConstant(FRM, DL, XLenVT));
15722
15723 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
15724 // extend.
15725 if (Opc == RISCVISD::FCVT_WU_RV64)
15726 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
15727
15728 // RISC-V FP-to-int conversions saturate to the destination register size, but
15729 // don't produce 0 for nan.
15730 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
15731 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
15732}
15733
15734// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
15735// smaller than XLenVT.
15737 const RISCVSubtarget &Subtarget) {
15738 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
15739
15740 SDValue Src = N->getOperand(0);
15741 if (Src.getOpcode() != ISD::BSWAP)
15742 return SDValue();
15743
15744 EVT VT = N->getValueType(0);
15745 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
15747 return SDValue();
15748
15749 SDLoc DL(N);
15750 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
15751}
15752
15753// Convert from one FMA opcode to another based on whether we are negating the
15754// multiply result and/or the accumulator.
15755// NOTE: Only supports RVV operations with VL.
15756static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
15757 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
15758 if (NegMul) {
15759 // clang-format off
15760 switch (Opcode) {
15761 default: llvm_unreachable("Unexpected opcode");
15762 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15763 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15764 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15765 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15770 }
15771 // clang-format on
15772 }
15773
15774 // Negating the accumulator changes ADD<->SUB.
15775 if (NegAcc) {
15776 // clang-format off
15777 switch (Opcode) {
15778 default: llvm_unreachable("Unexpected opcode");
15779 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15780 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15781 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15782 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15787 }
15788 // clang-format on
15789 }
15790
15791 return Opcode;
15792}
15793
15795 // Fold FNEG_VL into FMA opcodes.
15796 // The first operand of strict-fp is chain.
15797 unsigned Offset = N->isTargetStrictFPOpcode();
15798 SDValue A = N->getOperand(0 + Offset);
15799 SDValue B = N->getOperand(1 + Offset);
15800 SDValue C = N->getOperand(2 + Offset);
15801 SDValue Mask = N->getOperand(3 + Offset);
15802 SDValue VL = N->getOperand(4 + Offset);
15803
15804 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
15805 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
15806 V.getOperand(2) == VL) {
15807 // Return the negated input.
15808 V = V.getOperand(0);
15809 return true;
15810 }
15811
15812 return false;
15813 };
15814
15815 bool NegA = invertIfNegative(A);
15816 bool NegB = invertIfNegative(B);
15817 bool NegC = invertIfNegative(C);
15818
15819 // If no operands are negated, we're done.
15820 if (!NegA && !NegB && !NegC)
15821 return SDValue();
15822
15823 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
15824 if (N->isTargetStrictFPOpcode())
15825 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
15826 {N->getOperand(0), A, B, C, Mask, VL});
15827 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
15828 VL);
15829}
15830
15833 const RISCVSubtarget &Subtarget) {
15834 SelectionDAG &DAG = DCI.DAG;
15835
15837 return V;
15838
15839 // FIXME: Ignore strict opcodes for now.
15840 if (N->isTargetStrictFPOpcode())
15841 return SDValue();
15842
15843 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
15844}
15845
15847 const RISCVSubtarget &Subtarget) {
15848 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15849
15850 EVT VT = N->getValueType(0);
15851
15852 if (VT != Subtarget.getXLenVT())
15853 return SDValue();
15854
15855 if (!isa<ConstantSDNode>(N->getOperand(1)))
15856 return SDValue();
15857 uint64_t ShAmt = N->getConstantOperandVal(1);
15858
15859 SDValue N0 = N->getOperand(0);
15860
15861 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
15862 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
15863 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
15864 unsigned ExtSize =
15865 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
15866 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
15867 N0.getOperand(0).hasOneUse() &&
15869 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15870 if (LShAmt < ExtSize) {
15871 unsigned Size = VT.getSizeInBits();
15872 SDLoc ShlDL(N0.getOperand(0));
15873 SDValue Shl =
15874 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
15875 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
15876 SDLoc DL(N);
15877 return DAG.getNode(ISD::SRA, DL, VT, Shl,
15878 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
15879 }
15880 }
15881 }
15882
15883 if (ShAmt > 32 || VT != MVT::i64)
15884 return SDValue();
15885
15886 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15887 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15888 //
15889 // Also try these folds where an add or sub is in the middle.
15890 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15891 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15892 SDValue Shl;
15893 ConstantSDNode *AddC = nullptr;
15894
15895 // We might have an ADD or SUB between the SRA and SHL.
15896 bool IsAdd = N0.getOpcode() == ISD::ADD;
15897 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15898 // Other operand needs to be a constant we can modify.
15899 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15900 if (!AddC)
15901 return SDValue();
15902
15903 // AddC needs to have at least 32 trailing zeros.
15904 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
15905 return SDValue();
15906
15907 // All users should be a shift by constant less than or equal to 32. This
15908 // ensures we'll do this optimization for each of them to produce an
15909 // add/sub+sext_inreg they can all share.
15910 for (SDNode *U : N0->uses()) {
15911 if (U->getOpcode() != ISD::SRA ||
15912 !isa<ConstantSDNode>(U->getOperand(1)) ||
15913 U->getConstantOperandVal(1) > 32)
15914 return SDValue();
15915 }
15916
15917 Shl = N0.getOperand(IsAdd ? 0 : 1);
15918 } else {
15919 // Not an ADD or SUB.
15920 Shl = N0;
15921 }
15922
15923 // Look for a shift left by 32.
15924 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15925 Shl.getConstantOperandVal(1) != 32)
15926 return SDValue();
15927
15928 // We if we didn't look through an add/sub, then the shl should have one use.
15929 // If we did look through an add/sub, the sext_inreg we create is free so
15930 // we're only creating 2 new instructions. It's enough to only remove the
15931 // original sra+add/sub.
15932 if (!AddC && !Shl.hasOneUse())
15933 return SDValue();
15934
15935 SDLoc DL(N);
15936 SDValue In = Shl.getOperand(0);
15937
15938 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15939 // constant.
15940 if (AddC) {
15941 SDValue ShiftedAddC =
15942 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
15943 if (IsAdd)
15944 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15945 else
15946 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15947 }
15948
15949 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15950 DAG.getValueType(MVT::i32));
15951 if (ShAmt == 32)
15952 return SExt;
15953
15954 return DAG.getNode(
15955 ISD::SHL, DL, MVT::i64, SExt,
15956 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15957}
15958
15959// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15960// the result is used as the conditon of a br_cc or select_cc we can invert,
15961// inverting the setcc is free, and Z is 0/1. Caller will invert the
15962// br_cc/select_cc.
15964 bool IsAnd = Cond.getOpcode() == ISD::AND;
15965 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15966 return SDValue();
15967
15968 if (!Cond.hasOneUse())
15969 return SDValue();
15970
15971 SDValue Setcc = Cond.getOperand(0);
15973 // Canonicalize setcc to LHS.
15974 if (Setcc.getOpcode() != ISD::SETCC)
15975 std::swap(Setcc, Xor);
15976 // LHS should be a setcc and RHS should be an xor.
15977 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15978 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15979 return SDValue();
15980
15981 // If the condition is an And, SimplifyDemandedBits may have changed
15982 // (xor Z, 1) to (not Z).
15983 SDValue Xor1 = Xor.getOperand(1);
15984 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15985 return SDValue();
15986
15987 EVT VT = Cond.getValueType();
15988 SDValue Xor0 = Xor.getOperand(0);
15989
15990 // The LHS of the xor needs to be 0/1.
15992 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15993 return SDValue();
15994
15995 // We can only invert integer setccs.
15996 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15997 if (!SetCCOpVT.isScalarInteger())
15998 return SDValue();
15999
16000 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16001 if (ISD::isIntEqualitySetCC(CCVal)) {
16002 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16003 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16004 Setcc.getOperand(1), CCVal);
16005 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16006 // Invert (setlt 0, X) by converting to (setlt X, 1).
16007 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16008 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16009 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16010 // (setlt X, 1) by converting to (setlt 0, X).
16011 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16012 DAG.getConstant(0, SDLoc(Setcc), VT),
16013 Setcc.getOperand(0), CCVal);
16014 } else
16015 return SDValue();
16016
16017 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16018 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16019}
16020
16021// Perform common combines for BR_CC and SELECT_CC condtions.
16022static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16023 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16024 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16025
16026 // As far as arithmetic right shift always saves the sign,
16027 // shift can be omitted.
16028 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16029 // setge (sra X, N), 0 -> setge X, 0
16030 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16031 LHS.getOpcode() == ISD::SRA) {
16032 LHS = LHS.getOperand(0);
16033 return true;
16034 }
16035
16036 if (!ISD::isIntEqualitySetCC(CCVal))
16037 return false;
16038
16039 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16040 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16041 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16042 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16043 // If we're looking for eq 0 instead of ne 0, we need to invert the
16044 // condition.
16045 bool Invert = CCVal == ISD::SETEQ;
16046 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16047 if (Invert)
16048 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16049
16050 RHS = LHS.getOperand(1);
16051 LHS = LHS.getOperand(0);
16052 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16053
16054 CC = DAG.getCondCode(CCVal);
16055 return true;
16056 }
16057
16058 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16059 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16060 RHS = LHS.getOperand(1);
16061 LHS = LHS.getOperand(0);
16062 return true;
16063 }
16064
16065 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16066 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16067 LHS.getOperand(1).getOpcode() == ISD::Constant) {
16068 SDValue LHS0 = LHS.getOperand(0);
16069 if (LHS0.getOpcode() == ISD::AND &&
16070 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16071 uint64_t Mask = LHS0.getConstantOperandVal(1);
16072 uint64_t ShAmt = LHS.getConstantOperandVal(1);
16073 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16074 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16075 CC = DAG.getCondCode(CCVal);
16076
16077 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16078 LHS = LHS0.getOperand(0);
16079 if (ShAmt != 0)
16080 LHS =
16081 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16082 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16083 return true;
16084 }
16085 }
16086 }
16087
16088 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16089 // This can occur when legalizing some floating point comparisons.
16090 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16091 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16092 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16093 CC = DAG.getCondCode(CCVal);
16094 RHS = DAG.getConstant(0, DL, LHS.getValueType());
16095 return true;
16096 }
16097
16098 if (isNullConstant(RHS)) {
16099 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16100 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16101 CC = DAG.getCondCode(CCVal);
16102 LHS = NewCond;
16103 return true;
16104 }
16105 }
16106
16107 return false;
16108}
16109
16110// Fold
16111// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16112// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16113// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16114// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16116 SDValue TrueVal, SDValue FalseVal,
16117 bool Swapped) {
16118 bool Commutative = true;
16119 unsigned Opc = TrueVal.getOpcode();
16120 switch (Opc) {
16121 default:
16122 return SDValue();
16123 case ISD::SHL:
16124 case ISD::SRA:
16125 case ISD::SRL:
16126 case ISD::SUB:
16127 Commutative = false;
16128 break;
16129 case ISD::ADD:
16130 case ISD::OR:
16131 case ISD::XOR:
16132 break;
16133 }
16134
16135 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16136 return SDValue();
16137
16138 unsigned OpToFold;
16139 if (FalseVal == TrueVal.getOperand(0))
16140 OpToFold = 0;
16141 else if (Commutative && FalseVal == TrueVal.getOperand(1))
16142 OpToFold = 1;
16143 else
16144 return SDValue();
16145
16146 EVT VT = N->getValueType(0);
16147 SDLoc DL(N);
16148 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16149 EVT OtherOpVT = OtherOp.getValueType();
16150 SDValue IdentityOperand =
16151 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16152 if (!Commutative)
16153 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16154 assert(IdentityOperand && "No identity operand!");
16155
16156 if (Swapped)
16157 std::swap(OtherOp, IdentityOperand);
16158 SDValue NewSel =
16159 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16160 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16161}
16162
16163// This tries to get rid of `select` and `icmp` that are being used to handle
16164// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16166 SDValue Cond = N->getOperand(0);
16167
16168 // This represents either CTTZ or CTLZ instruction.
16169 SDValue CountZeroes;
16170
16171 SDValue ValOnZero;
16172
16173 if (Cond.getOpcode() != ISD::SETCC)
16174 return SDValue();
16175
16176 if (!isNullConstant(Cond->getOperand(1)))
16177 return SDValue();
16178
16179 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16180 if (CCVal == ISD::CondCode::SETEQ) {
16181 CountZeroes = N->getOperand(2);
16182 ValOnZero = N->getOperand(1);
16183 } else if (CCVal == ISD::CondCode::SETNE) {
16184 CountZeroes = N->getOperand(1);
16185 ValOnZero = N->getOperand(2);
16186 } else {
16187 return SDValue();
16188 }
16189
16190 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16191 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16192 CountZeroes = CountZeroes.getOperand(0);
16193
16194 if (CountZeroes.getOpcode() != ISD::CTTZ &&
16195 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16196 CountZeroes.getOpcode() != ISD::CTLZ &&
16197 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16198 return SDValue();
16199
16200 if (!isNullConstant(ValOnZero))
16201 return SDValue();
16202
16203 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16204 if (Cond->getOperand(0) != CountZeroesArgument)
16205 return SDValue();
16206
16207 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16208 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16209 CountZeroes.getValueType(), CountZeroesArgument);
16210 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16211 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16212 CountZeroes.getValueType(), CountZeroesArgument);
16213 }
16214
16215 unsigned BitWidth = CountZeroes.getValueSizeInBits();
16216 SDValue BitWidthMinusOne =
16217 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16218
16219 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16220 CountZeroes, BitWidthMinusOne);
16221 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16222}
16223
16225 const RISCVSubtarget &Subtarget) {
16226 SDValue Cond = N->getOperand(0);
16227 SDValue True = N->getOperand(1);
16228 SDValue False = N->getOperand(2);
16229 SDLoc DL(N);
16230 EVT VT = N->getValueType(0);
16231 EVT CondVT = Cond.getValueType();
16232
16233 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16234 return SDValue();
16235
16236 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16237 // BEXTI, where C is power of 2.
16238 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16239 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16240 SDValue LHS = Cond.getOperand(0);
16241 SDValue RHS = Cond.getOperand(1);
16242 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16243 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16244 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16245 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16246 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16247 return DAG.getSelect(DL, VT,
16248 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16249 False, True);
16250 }
16251 }
16252 return SDValue();
16253}
16254
16256 const RISCVSubtarget &Subtarget) {
16257 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16258 return Folded;
16259
16260 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16261 return V;
16262
16263 if (Subtarget.hasConditionalMoveFusion())
16264 return SDValue();
16265
16266 SDValue TrueVal = N->getOperand(1);
16267 SDValue FalseVal = N->getOperand(2);
16268 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16269 return V;
16270 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16271}
16272
16273/// If we have a build_vector where each lane is binop X, C, where C
16274/// is a constant (but not necessarily the same constant on all lanes),
16275/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16276/// We assume that materializing a constant build vector will be no more
16277/// expensive that performing O(n) binops.
16279 const RISCVSubtarget &Subtarget,
16280 const RISCVTargetLowering &TLI) {
16281 SDLoc DL(N);
16282 EVT VT = N->getValueType(0);
16283
16284 assert(!VT.isScalableVector() && "unexpected build vector");
16285
16286 if (VT.getVectorNumElements() == 1)
16287 return SDValue();
16288
16289 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16290 if (!TLI.isBinOp(Opcode))
16291 return SDValue();
16292
16293 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16294 return SDValue();
16295
16296 // This BUILD_VECTOR involves an implicit truncation, and sinking
16297 // truncates through binops is non-trivial.
16298 if (N->op_begin()->getValueType() != VT.getVectorElementType())
16299 return SDValue();
16300
16301 SmallVector<SDValue> LHSOps;
16302 SmallVector<SDValue> RHSOps;
16303 for (SDValue Op : N->ops()) {
16304 if (Op.isUndef()) {
16305 // We can't form a divide or remainder from undef.
16306 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16307 return SDValue();
16308
16309 LHSOps.push_back(Op);
16310 RHSOps.push_back(Op);
16311 continue;
16312 }
16313
16314 // TODO: We can handle operations which have an neutral rhs value
16315 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16316 // of profit in a more explicit manner.
16317 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16318 return SDValue();
16319
16320 LHSOps.push_back(Op.getOperand(0));
16321 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16322 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16323 return SDValue();
16324 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16325 // have different LHS and RHS types.
16326 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16327 return SDValue();
16328
16329 RHSOps.push_back(Op.getOperand(1));
16330 }
16331
16332 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
16333 DAG.getBuildVector(VT, DL, RHSOps));
16334}
16335
16337 const RISCVSubtarget &Subtarget,
16338 const RISCVTargetLowering &TLI) {
16339 SDValue InVec = N->getOperand(0);
16340 SDValue InVal = N->getOperand(1);
16341 SDValue EltNo = N->getOperand(2);
16342 SDLoc DL(N);
16343
16344 EVT VT = InVec.getValueType();
16345 if (VT.isScalableVector())
16346 return SDValue();
16347
16348 if (!InVec.hasOneUse())
16349 return SDValue();
16350
16351 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
16352 // move the insert_vector_elts into the arms of the binop. Note that
16353 // the new RHS must be a constant.
16354 const unsigned InVecOpcode = InVec->getOpcode();
16355 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
16356 InVal.hasOneUse()) {
16357 SDValue InVecLHS = InVec->getOperand(0);
16358 SDValue InVecRHS = InVec->getOperand(1);
16359 SDValue InValLHS = InVal->getOperand(0);
16360 SDValue InValRHS = InVal->getOperand(1);
16361
16363 return SDValue();
16364 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
16365 return SDValue();
16366 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16367 // have different LHS and RHS types.
16368 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
16369 return SDValue();
16371 InVecLHS, InValLHS, EltNo);
16373 InVecRHS, InValRHS, EltNo);
16374 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
16375 }
16376
16377 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
16378 // move the insert_vector_elt to the source operand of the concat_vector.
16379 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
16380 return SDValue();
16381
16382 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16383 if (!IndexC)
16384 return SDValue();
16385 unsigned Elt = IndexC->getZExtValue();
16386
16387 EVT ConcatVT = InVec.getOperand(0).getValueType();
16388 if (ConcatVT.getVectorElementType() != InVal.getValueType())
16389 return SDValue();
16390 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
16391 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
16392
16393 unsigned ConcatOpIdx = Elt / ConcatNumElts;
16394 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
16395 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
16396 ConcatOp, InVal, NewIdx);
16397
16398 SmallVector<SDValue> ConcatOps;
16399 ConcatOps.append(InVec->op_begin(), InVec->op_end());
16400 ConcatOps[ConcatOpIdx] = ConcatOp;
16401 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16402}
16403
16404// If we're concatenating a series of vector loads like
16405// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
16406// Then we can turn this into a strided load by widening the vector elements
16407// vlse32 p, stride=n
16409 const RISCVSubtarget &Subtarget,
16410 const RISCVTargetLowering &TLI) {
16411 SDLoc DL(N);
16412 EVT VT = N->getValueType(0);
16413
16414 // Only perform this combine on legal MVTs.
16415 if (!TLI.isTypeLegal(VT))
16416 return SDValue();
16417
16418 // TODO: Potentially extend this to scalable vectors
16419 if (VT.isScalableVector())
16420 return SDValue();
16421
16422 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
16423 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
16424 !SDValue(BaseLd, 0).hasOneUse())
16425 return SDValue();
16426
16427 EVT BaseLdVT = BaseLd->getValueType(0);
16428
16429 // Go through the loads and check that they're strided
16431 Lds.push_back(BaseLd);
16432 Align Align = BaseLd->getAlign();
16433 for (SDValue Op : N->ops().drop_front()) {
16434 auto *Ld = dyn_cast<LoadSDNode>(Op);
16435 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
16436 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
16437 Ld->getValueType(0) != BaseLdVT)
16438 return SDValue();
16439
16440 Lds.push_back(Ld);
16441
16442 // The common alignment is the most restrictive (smallest) of all the loads
16443 Align = std::min(Align, Ld->getAlign());
16444 }
16445
16446 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
16447 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
16448 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
16449 // If the load ptrs can be decomposed into a common (Base + Index) with a
16450 // common constant stride, then return the constant stride.
16451 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
16452 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
16453 if (BIO1.equalBaseIndex(BIO2, DAG))
16454 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
16455
16456 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
16457 SDValue P1 = Ld1->getBasePtr();
16458 SDValue P2 = Ld2->getBasePtr();
16459 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
16460 return {{P2.getOperand(1), false}};
16461 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
16462 return {{P1.getOperand(1), true}};
16463
16464 return std::nullopt;
16465 };
16466
16467 // Get the distance between the first and second loads
16468 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
16469 if (!BaseDiff)
16470 return SDValue();
16471
16472 // Check all the loads are the same distance apart
16473 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
16474 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
16475 return SDValue();
16476
16477 // TODO: At this point, we've successfully matched a generalized gather
16478 // load. Maybe we should emit that, and then move the specialized
16479 // matchers above and below into a DAG combine?
16480
16481 // Get the widened scalar type, e.g. v4i8 -> i64
16482 unsigned WideScalarBitWidth =
16483 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
16484 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
16485
16486 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
16487 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
16488 if (!TLI.isTypeLegal(WideVecVT))
16489 return SDValue();
16490
16491 // Check that the operation is legal
16492 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
16493 return SDValue();
16494
16495 auto [StrideVariant, MustNegateStride] = *BaseDiff;
16496 SDValue Stride =
16497 std::holds_alternative<SDValue>(StrideVariant)
16498 ? std::get<SDValue>(StrideVariant)
16499 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
16500 Lds[0]->getOffset().getValueType());
16501 if (MustNegateStride)
16502 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
16503
16504 SDValue AllOneMask =
16505 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
16506 DAG.getConstant(1, DL, MVT::i1));
16507
16508 uint64_t MemSize;
16509 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
16510 ConstStride && ConstStride->getSExtValue() >= 0)
16511 // total size = (elsize * n) + (stride - elsize) * (n-1)
16512 // = elsize + stride * (n-1)
16513 MemSize = WideScalarVT.getSizeInBits() +
16514 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
16515 else
16516 // If Stride isn't constant, then we can't know how much it will load
16518
16520 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
16521 Align);
16522
16523 SDValue StridedLoad = DAG.getStridedLoadVP(
16524 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
16525 AllOneMask,
16526 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
16527
16528 for (SDValue Ld : N->ops())
16529 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
16530
16531 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
16532}
16533
16535 const RISCVSubtarget &Subtarget) {
16536
16537 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
16538
16539 if (N->getValueType(0).isFixedLengthVector())
16540 return SDValue();
16541
16542 SDValue Addend = N->getOperand(0);
16543 SDValue MulOp = N->getOperand(1);
16544
16545 if (N->getOpcode() == RISCVISD::ADD_VL) {
16546 SDValue AddPassthruOp = N->getOperand(2);
16547 if (!AddPassthruOp.isUndef())
16548 return SDValue();
16549 }
16550
16551 auto IsVWMulOpc = [](unsigned Opc) {
16552 switch (Opc) {
16553 case RISCVISD::VWMUL_VL:
16556 return true;
16557 default:
16558 return false;
16559 }
16560 };
16561
16562 if (!IsVWMulOpc(MulOp.getOpcode()))
16563 std::swap(Addend, MulOp);
16564
16565 if (!IsVWMulOpc(MulOp.getOpcode()))
16566 return SDValue();
16567
16568 SDValue MulPassthruOp = MulOp.getOperand(2);
16569
16570 if (!MulPassthruOp.isUndef())
16571 return SDValue();
16572
16573 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
16574 const RISCVSubtarget &Subtarget) {
16575 if (N->getOpcode() == ISD::ADD) {
16576 SDLoc DL(N);
16577 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
16578 Subtarget);
16579 }
16580 return std::make_pair(N->getOperand(3), N->getOperand(4));
16581 }(N, DAG, Subtarget);
16582
16583 SDValue MulMask = MulOp.getOperand(3);
16584 SDValue MulVL = MulOp.getOperand(4);
16585
16586 if (AddMask != MulMask || AddVL != MulVL)
16587 return SDValue();
16588
16589 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
16590 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
16591 "Unexpected opcode after VWMACC_VL");
16592 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
16593 "Unexpected opcode after VWMACC_VL!");
16594 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
16595 "Unexpected opcode after VWMUL_VL!");
16596 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
16597 "Unexpected opcode after VWMUL_VL!");
16598
16599 SDLoc DL(N);
16600 EVT VT = N->getValueType(0);
16601 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
16602 AddVL};
16603 return DAG.getNode(Opc, DL, VT, Ops);
16604}
16605
16607 ISD::MemIndexType &IndexType,
16609 if (!DCI.isBeforeLegalize())
16610 return false;
16611
16612 SelectionDAG &DAG = DCI.DAG;
16613 const MVT XLenVT =
16614 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
16615
16616 const EVT IndexVT = Index.getValueType();
16617
16618 // RISC-V indexed loads only support the "unsigned unscaled" addressing
16619 // mode, so anything else must be manually legalized.
16620 if (!isIndexTypeSigned(IndexType))
16621 return false;
16622
16623 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
16624 // Any index legalization should first promote to XLenVT, so we don't lose
16625 // bits when scaling. This may create an illegal index type so we let
16626 // LLVM's legalization take care of the splitting.
16627 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
16629 IndexVT.changeVectorElementType(XLenVT), Index);
16630 }
16631 IndexType = ISD::UNSIGNED_SCALED;
16632 return true;
16633}
16634
16635/// Match the index vector of a scatter or gather node as the shuffle mask
16636/// which performs the rearrangement if possible. Will only match if
16637/// all lanes are touched, and thus replacing the scatter or gather with
16638/// a unit strided access and shuffle is legal.
16640 SmallVector<int> &ShuffleMask) {
16641 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16642 return false;
16644 return false;
16645
16646 const unsigned ElementSize = VT.getScalarStoreSize();
16647 const unsigned NumElems = VT.getVectorNumElements();
16648
16649 // Create the shuffle mask and check all bits active
16650 assert(ShuffleMask.empty());
16651 BitVector ActiveLanes(NumElems);
16652 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16653 // TODO: We've found an active bit of UB, and could be
16654 // more aggressive here if desired.
16655 if (Index->getOperand(i)->isUndef())
16656 return false;
16657 uint64_t C = Index->getConstantOperandVal(i);
16658 if (C % ElementSize != 0)
16659 return false;
16660 C = C / ElementSize;
16661 if (C >= NumElems)
16662 return false;
16663 ShuffleMask.push_back(C);
16664 ActiveLanes.set(C);
16665 }
16666 return ActiveLanes.all();
16667}
16668
16669/// Match the index of a gather or scatter operation as an operation
16670/// with twice the element width and half the number of elements. This is
16671/// generally profitable (if legal) because these operations are linear
16672/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
16673/// come out ahead.
16675 Align BaseAlign, const RISCVSubtarget &ST) {
16676 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16677 return false;
16679 return false;
16680
16681 // Attempt a doubling. If we can use a element type 4x or 8x in
16682 // size, this will happen via multiply iterations of the transform.
16683 const unsigned NumElems = VT.getVectorNumElements();
16684 if (NumElems % 2 != 0)
16685 return false;
16686
16687 const unsigned ElementSize = VT.getScalarStoreSize();
16688 const unsigned WiderElementSize = ElementSize * 2;
16689 if (WiderElementSize > ST.getELen()/8)
16690 return false;
16691
16692 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
16693 return false;
16694
16695 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16696 // TODO: We've found an active bit of UB, and could be
16697 // more aggressive here if desired.
16698 if (Index->getOperand(i)->isUndef())
16699 return false;
16700 // TODO: This offset check is too strict if we support fully
16701 // misaligned memory operations.
16702 uint64_t C = Index->getConstantOperandVal(i);
16703 if (i % 2 == 0) {
16704 if (C % WiderElementSize != 0)
16705 return false;
16706 continue;
16707 }
16708 uint64_t Last = Index->getConstantOperandVal(i-1);
16709 if (C != Last + ElementSize)
16710 return false;
16711 }
16712 return true;
16713}
16714
16715// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16716// This would be benefit for the cases where X and Y are both the same value
16717// type of low precision vectors. Since the truncate would be lowered into
16718// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16719// restriction, such pattern would be expanded into a series of "vsetvli"
16720// and "vnsrl" instructions later to reach this point.
16722 SDValue Mask = N->getOperand(1);
16723 SDValue VL = N->getOperand(2);
16724
16725 bool IsVLMAX = isAllOnesConstant(VL) ||
16726 (isa<RegisterSDNode>(VL) &&
16727 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16728 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
16729 Mask.getOperand(0) != VL)
16730 return SDValue();
16731
16732 auto IsTruncNode = [&](SDValue V) {
16733 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16734 V.getOperand(1) == Mask && V.getOperand(2) == VL;
16735 };
16736
16737 SDValue Op = N->getOperand(0);
16738
16739 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16740 // to distinguish such pattern.
16741 while (IsTruncNode(Op)) {
16742 if (!Op.hasOneUse())
16743 return SDValue();
16744 Op = Op.getOperand(0);
16745 }
16746
16747 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
16748 return SDValue();
16749
16750 SDValue N0 = Op.getOperand(0);
16751 SDValue N1 = Op.getOperand(1);
16752 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
16753 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
16754 return SDValue();
16755
16756 SDValue N00 = N0.getOperand(0);
16757 SDValue N10 = N1.getOperand(0);
16758 if (!N00.getValueType().isVector() ||
16759 N00.getValueType() != N10.getValueType() ||
16760 N->getValueType(0) != N10.getValueType())
16761 return SDValue();
16762
16763 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16764 SDValue SMin =
16765 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16766 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16767 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16768}
16769
16770// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
16771// maximum value for the truncated type.
16772// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
16773// is the signed maximum value for the truncated type and C2 is the signed
16774// minimum value.
16776 const RISCVSubtarget &Subtarget) {
16777 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
16778
16779 MVT VT = N->getSimpleValueType(0);
16780
16781 SDValue Mask = N->getOperand(1);
16782 SDValue VL = N->getOperand(2);
16783
16784 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
16785 APInt &SplatVal) {
16786 if (V.getOpcode() != Opc &&
16787 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
16788 V.getOperand(3) == Mask && V.getOperand(4) == VL))
16789 return SDValue();
16790
16791 SDValue Op = V.getOperand(1);
16792
16793 // Peek through conversion between fixed and scalable vectors.
16794 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
16795 isNullConstant(Op.getOperand(2)) &&
16796 Op.getOperand(1).getValueType().isFixedLengthVector() &&
16797 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
16798 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
16799 isNullConstant(Op.getOperand(1).getOperand(1)))
16800 Op = Op.getOperand(1).getOperand(0);
16801
16802 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
16803 return V.getOperand(0);
16804
16805 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
16806 Op.getOperand(2) == VL) {
16807 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
16808 SplatVal =
16809 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
16810 return V.getOperand(0);
16811 }
16812 }
16813
16814 return SDValue();
16815 };
16816
16817 SDLoc DL(N);
16818
16819 auto DetectUSatPattern = [&](SDValue V) {
16820 APInt LoC, HiC;
16821
16822 // Simple case, V is a UMIN.
16823 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
16824 if (HiC.isMask(VT.getScalarSizeInBits()))
16825 return UMinOp;
16826
16827 // If we have an SMAX that removes negative numbers first, then we can match
16828 // SMIN instead of UMIN.
16829 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16830 if (SDValue SMaxOp =
16831 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16832 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
16833 return SMinOp;
16834
16835 // If we have an SMIN before an SMAX and the SMAX constant is less than or
16836 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
16837 // first.
16838 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16839 if (SDValue SMinOp =
16840 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16841 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
16842 HiC.uge(LoC))
16843 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
16844 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
16845 Mask, VL);
16846
16847 return SDValue();
16848 };
16849
16850 auto DetectSSatPattern = [&](SDValue V) {
16851 unsigned NumDstBits = VT.getScalarSizeInBits();
16852 unsigned NumSrcBits = V.getScalarValueSizeInBits();
16853 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
16854 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
16855
16856 APInt HiC, LoC;
16857 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16858 if (SDValue SMaxOp =
16859 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16860 if (HiC == SignedMax && LoC == SignedMin)
16861 return SMaxOp;
16862
16863 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
16864 if (SDValue SMinOp =
16865 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
16866 if (HiC == SignedMax && LoC == SignedMin)
16867 return SMinOp;
16868
16869 return SDValue();
16870 };
16871
16872 SDValue Src = N->getOperand(0);
16873
16874 // Look through multiple layers of truncates.
16875 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
16876 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
16877 Src.hasOneUse())
16878 Src = Src.getOperand(0);
16879
16880 SDValue Val;
16881 unsigned ClipOpc;
16882 if ((Val = DetectUSatPattern(Src)))
16884 else if ((Val = DetectSSatPattern(Src)))
16886 else
16887 return SDValue();
16888
16889 MVT ValVT = Val.getSimpleValueType();
16890
16891 do {
16892 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
16893 ValVT = ValVT.changeVectorElementType(ValEltVT);
16894 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
16895 } while (ValVT != VT);
16896
16897 return Val;
16898}
16899
16901 DAGCombinerInfo &DCI) const {
16902 SelectionDAG &DAG = DCI.DAG;
16903 const MVT XLenVT = Subtarget.getXLenVT();
16904 SDLoc DL(N);
16905
16906 // Helper to call SimplifyDemandedBits on an operand of N where only some low
16907 // bits are demanded. N will be added to the Worklist if it was not deleted.
16908 // Caller should return SDValue(N, 0) if this returns true.
16909 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
16910 SDValue Op = N->getOperand(OpNo);
16911 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
16912 if (!SimplifyDemandedBits(Op, Mask, DCI))
16913 return false;
16914
16915 if (N->getOpcode() != ISD::DELETED_NODE)
16916 DCI.AddToWorklist(N);
16917 return true;
16918 };
16919
16920 switch (N->getOpcode()) {
16921 default:
16922 break;
16923 case RISCVISD::SplitF64: {
16924 SDValue Op0 = N->getOperand(0);
16925 // If the input to SplitF64 is just BuildPairF64 then the operation is
16926 // redundant. Instead, use BuildPairF64's operands directly.
16927 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
16928 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
16929
16930 if (Op0->isUndef()) {
16931 SDValue Lo = DAG.getUNDEF(MVT::i32);
16932 SDValue Hi = DAG.getUNDEF(MVT::i32);
16933 return DCI.CombineTo(N, Lo, Hi);
16934 }
16935
16936 // It's cheaper to materialise two 32-bit integers than to load a double
16937 // from the constant pool and transfer it to integer registers through the
16938 // stack.
16940 APInt V = C->getValueAPF().bitcastToAPInt();
16941 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
16942 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
16943 return DCI.CombineTo(N, Lo, Hi);
16944 }
16945
16946 // This is a target-specific version of a DAGCombine performed in
16947 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16948 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16949 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16950 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16951 !Op0.getNode()->hasOneUse())
16952 break;
16953 SDValue NewSplitF64 =
16954 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
16955 Op0.getOperand(0));
16956 SDValue Lo = NewSplitF64.getValue(0);
16957 SDValue Hi = NewSplitF64.getValue(1);
16958 APInt SignBit = APInt::getSignMask(32);
16959 if (Op0.getOpcode() == ISD::FNEG) {
16960 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
16961 DAG.getConstant(SignBit, DL, MVT::i32));
16962 return DCI.CombineTo(N, Lo, NewHi);
16963 }
16964 assert(Op0.getOpcode() == ISD::FABS);
16965 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
16966 DAG.getConstant(~SignBit, DL, MVT::i32));
16967 return DCI.CombineTo(N, Lo, NewHi);
16968 }
16969 case RISCVISD::SLLW:
16970 case RISCVISD::SRAW:
16971 case RISCVISD::SRLW:
16972 case RISCVISD::RORW:
16973 case RISCVISD::ROLW: {
16974 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
16975 if (SimplifyDemandedLowBitsHelper(0, 32) ||
16976 SimplifyDemandedLowBitsHelper(1, 5))
16977 return SDValue(N, 0);
16978
16979 break;
16980 }
16981 case RISCVISD::CLZW:
16982 case RISCVISD::CTZW: {
16983 // Only the lower 32 bits of the first operand are read
16984 if (SimplifyDemandedLowBitsHelper(0, 32))
16985 return SDValue(N, 0);
16986 break;
16987 }
16989 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
16990 // conversion is unnecessary and can be replaced with the
16991 // FMV_X_ANYEXTW_RV64 operand.
16992 SDValue Op0 = N->getOperand(0);
16994 return Op0.getOperand(0);
16995 break;
16996 }
16999 SDLoc DL(N);
17000 SDValue Op0 = N->getOperand(0);
17001 MVT VT = N->getSimpleValueType(0);
17002
17003 // Constant fold.
17004 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17005 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17006 return DAG.getConstant(Val, DL, VT);
17007 }
17008
17009 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17010 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17011 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17012 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17013 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17014 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17015 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17016 assert(Op0.getOperand(0).getValueType() == VT &&
17017 "Unexpected value type!");
17018 return Op0.getOperand(0);
17019 }
17020
17021 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17022 cast<LoadSDNode>(Op0)->isSimple()) {
17024 auto *LN0 = cast<LoadSDNode>(Op0);
17025 SDValue Load =
17026 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17027 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17028 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17029 return Load;
17030 }
17031
17032 // This is a target-specific version of a DAGCombine performed in
17033 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17034 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17035 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17036 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17037 !Op0.getNode()->hasOneUse())
17038 break;
17039 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17040 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17041 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17042 if (Op0.getOpcode() == ISD::FNEG)
17043 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17044 DAG.getConstant(SignBit, DL, VT));
17045
17046 assert(Op0.getOpcode() == ISD::FABS);
17047 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17048 DAG.getConstant(~SignBit, DL, VT));
17049 }
17050 case ISD::ABS: {
17051 EVT VT = N->getValueType(0);
17052 SDValue N0 = N->getOperand(0);
17053 // abs (sext) -> zext (abs)
17054 // abs (zext) -> zext (handled elsewhere)
17055 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17056 SDValue Src = N0.getOperand(0);
17057 SDLoc DL(N);
17058 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17059 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17060 }
17061 break;
17062 }
17063 case ISD::ADD: {
17064 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17065 return V;
17066 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17067 return V;
17068 return performADDCombine(N, DCI, Subtarget);
17069 }
17070 case ISD::SUB: {
17071 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17072 return V;
17073 return performSUBCombine(N, DAG, Subtarget);
17074 }
17075 case ISD::AND:
17076 return performANDCombine(N, DCI, Subtarget);
17077 case ISD::OR: {
17078 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17079 return V;
17080 return performORCombine(N, DCI, Subtarget);
17081 }
17082 case ISD::XOR:
17083 return performXORCombine(N, DAG, Subtarget);
17084 case ISD::MUL:
17085 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17086 return V;
17087 return performMULCombine(N, DAG, DCI, Subtarget);
17088 case ISD::SDIV:
17089 case ISD::UDIV:
17090 case ISD::SREM:
17091 case ISD::UREM:
17092 if (SDValue V = combineBinOpOfZExt(N, DAG))
17093 return V;
17094 break;
17095 case ISD::FMUL: {
17096 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17097 SDValue N0 = N->getOperand(0);
17098 SDValue N1 = N->getOperand(1);
17099 if (N0->getOpcode() != ISD::FCOPYSIGN)
17100 std::swap(N0, N1);
17101 if (N0->getOpcode() != ISD::FCOPYSIGN)
17102 return SDValue();
17104 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17105 return SDValue();
17106 EVT VT = N->getValueType(0);
17107 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17108 return SDValue();
17109 SDValue Sign = N0->getOperand(1);
17110 if (Sign.getValueType() != VT)
17111 return SDValue();
17112 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17113 }
17114 case ISD::FADD:
17115 case ISD::UMAX:
17116 case ISD::UMIN:
17117 case ISD::SMAX:
17118 case ISD::SMIN:
17119 case ISD::FMAXNUM:
17120 case ISD::FMINNUM: {
17121 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17122 return V;
17123 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17124 return V;
17125 return SDValue();
17126 }
17127 case ISD::SETCC:
17128 return performSETCCCombine(N, DAG, Subtarget);
17130 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17131 case ISD::ZERO_EXTEND:
17132 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17133 // type legalization. This is safe because fp_to_uint produces poison if
17134 // it overflows.
17135 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17136 SDValue Src = N->getOperand(0);
17137 if (Src.getOpcode() == ISD::FP_TO_UINT &&
17138 isTypeLegal(Src.getOperand(0).getValueType()))
17139 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
17140 Src.getOperand(0));
17141 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17142 isTypeLegal(Src.getOperand(1).getValueType())) {
17143 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17144 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
17145 Src.getOperand(0), Src.getOperand(1));
17146 DCI.CombineTo(N, Res);
17147 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17148 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17149 return SDValue(N, 0); // Return N so it doesn't get rechecked.
17150 }
17151 }
17152 return SDValue();
17154 if (SDValue V = combineTruncOfSraSext(N, DAG))
17155 return V;
17156 return combineTruncToVnclip(N, DAG, Subtarget);
17157 case ISD::TRUNCATE:
17158 return performTRUNCATECombine(N, DAG, Subtarget);
17159 case ISD::SELECT:
17160 return performSELECTCombine(N, DAG, Subtarget);
17162 case RISCVISD::CZERO_NEZ: {
17163 SDValue Val = N->getOperand(0);
17164 SDValue Cond = N->getOperand(1);
17165
17166 unsigned Opc = N->getOpcode();
17167
17168 // czero_eqz x, x -> x
17169 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
17170 return Val;
17171
17172 unsigned InvOpc =
17174
17175 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17176 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17177 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
17178 SDValue NewCond = Cond.getOperand(0);
17179 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17180 if (DAG.MaskedValueIsZero(NewCond, Mask))
17181 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
17182 }
17183 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17184 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17185 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17186 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17187 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
17188 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17189 if (ISD::isIntEqualitySetCC(CCVal))
17190 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
17191 N->getValueType(0), Val, Cond.getOperand(0));
17192 }
17193 return SDValue();
17194 }
17195 case RISCVISD::SELECT_CC: {
17196 // Transform
17197 SDValue LHS = N->getOperand(0);
17198 SDValue RHS = N->getOperand(1);
17199 SDValue CC = N->getOperand(2);
17200 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17201 SDValue TrueV = N->getOperand(3);
17202 SDValue FalseV = N->getOperand(4);
17203 SDLoc DL(N);
17204 EVT VT = N->getValueType(0);
17205
17206 // If the True and False values are the same, we don't need a select_cc.
17207 if (TrueV == FalseV)
17208 return TrueV;
17209
17210 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
17211 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17212 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17213 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
17214 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
17215 if (CCVal == ISD::CondCode::SETGE)
17216 std::swap(TrueV, FalseV);
17217
17218 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17219 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17220 // Only handle simm12, if it is not in this range, it can be considered as
17221 // register.
17222 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
17223 isInt<12>(TrueSImm - FalseSImm)) {
17224 SDValue SRA =
17225 DAG.getNode(ISD::SRA, DL, VT, LHS,
17226 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
17227 SDValue AND =
17228 DAG.getNode(ISD::AND, DL, VT, SRA,
17229 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
17230 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
17231 }
17232
17233 if (CCVal == ISD::CondCode::SETGE)
17234 std::swap(TrueV, FalseV);
17235 }
17236
17237 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17238 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
17239 {LHS, RHS, CC, TrueV, FalseV});
17240
17241 if (!Subtarget.hasConditionalMoveFusion()) {
17242 // (select c, -1, y) -> -c | y
17243 if (isAllOnesConstant(TrueV)) {
17244 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17245 SDValue Neg = DAG.getNegative(C, DL, VT);
17246 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
17247 }
17248 // (select c, y, -1) -> -!c | y
17249 if (isAllOnesConstant(FalseV)) {
17250 SDValue C =
17251 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17252 SDValue Neg = DAG.getNegative(C, DL, VT);
17253 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
17254 }
17255
17256 // (select c, 0, y) -> -!c & y
17257 if (isNullConstant(TrueV)) {
17258 SDValue C =
17259 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17260 SDValue Neg = DAG.getNegative(C, DL, VT);
17261 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
17262 }
17263 // (select c, y, 0) -> -c & y
17264 if (isNullConstant(FalseV)) {
17265 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17266 SDValue Neg = DAG.getNegative(C, DL, VT);
17267 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
17268 }
17269 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
17270 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
17271 if (((isOneConstant(FalseV) && LHS == TrueV &&
17272 CCVal == ISD::CondCode::SETNE) ||
17273 (isOneConstant(TrueV) && LHS == FalseV &&
17274 CCVal == ISD::CondCode::SETEQ)) &&
17275 isNullConstant(RHS)) {
17276 // freeze it to be safe.
17277 LHS = DAG.getFreeze(LHS);
17278 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
17279 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
17280 }
17281 }
17282
17283 // If both true/false are an xor with 1, pull through the select.
17284 // This can occur after op legalization if both operands are setccs that
17285 // require an xor to invert.
17286 // FIXME: Generalize to other binary ops with identical operand?
17287 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
17288 TrueV.getOperand(1) == FalseV.getOperand(1) &&
17289 isOneConstant(TrueV.getOperand(1)) &&
17290 TrueV.hasOneUse() && FalseV.hasOneUse()) {
17291 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
17292 TrueV.getOperand(0), FalseV.getOperand(0));
17293 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
17294 }
17295
17296 return SDValue();
17297 }
17298 case RISCVISD::BR_CC: {
17299 SDValue LHS = N->getOperand(1);
17300 SDValue RHS = N->getOperand(2);
17301 SDValue CC = N->getOperand(3);
17302 SDLoc DL(N);
17303
17304 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17305 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
17306 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
17307
17308 return SDValue();
17309 }
17310 case ISD::BITREVERSE:
17311 return performBITREVERSECombine(N, DAG, Subtarget);
17312 case ISD::FP_TO_SINT:
17313 case ISD::FP_TO_UINT:
17314 return performFP_TO_INTCombine(N, DCI, Subtarget);
17317 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
17318 case ISD::FCOPYSIGN: {
17319 EVT VT = N->getValueType(0);
17320 if (!VT.isVector())
17321 break;
17322 // There is a form of VFSGNJ which injects the negated sign of its second
17323 // operand. Try and bubble any FNEG up after the extend/round to produce
17324 // this optimized pattern. Avoid modifying cases where FP_ROUND and
17325 // TRUNC=1.
17326 SDValue In2 = N->getOperand(1);
17327 // Avoid cases where the extend/round has multiple uses, as duplicating
17328 // those is typically more expensive than removing a fneg.
17329 if (!In2.hasOneUse())
17330 break;
17331 if (In2.getOpcode() != ISD::FP_EXTEND &&
17332 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
17333 break;
17334 In2 = In2.getOperand(0);
17335 if (In2.getOpcode() != ISD::FNEG)
17336 break;
17337 SDLoc DL(N);
17338 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
17339 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
17340 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
17341 }
17342 case ISD::MGATHER: {
17343 const auto *MGN = cast<MaskedGatherSDNode>(N);
17344 const EVT VT = N->getValueType(0);
17345 SDValue Index = MGN->getIndex();
17346 SDValue ScaleOp = MGN->getScale();
17347 ISD::MemIndexType IndexType = MGN->getIndexType();
17348 assert(!MGN->isIndexScaled() &&
17349 "Scaled gather/scatter should not be formed");
17350
17351 SDLoc DL(N);
17352 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17353 return DAG.getMaskedGather(
17354 N->getVTList(), MGN->getMemoryVT(), DL,
17355 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17356 MGN->getBasePtr(), Index, ScaleOp},
17357 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17358
17359 if (narrowIndex(Index, IndexType, DAG))
17360 return DAG.getMaskedGather(
17361 N->getVTList(), MGN->getMemoryVT(), DL,
17362 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17363 MGN->getBasePtr(), Index, ScaleOp},
17364 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17365
17366 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
17367 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
17368 // The sequence will be XLenVT, not the type of Index. Tell
17369 // isSimpleVIDSequence this so we avoid overflow.
17370 if (std::optional<VIDSequence> SimpleVID =
17371 isSimpleVIDSequence(Index, Subtarget.getXLen());
17372 SimpleVID && SimpleVID->StepDenominator == 1) {
17373 const int64_t StepNumerator = SimpleVID->StepNumerator;
17374 const int64_t Addend = SimpleVID->Addend;
17375
17376 // Note: We don't need to check alignment here since (by assumption
17377 // from the existance of the gather), our offsets must be sufficiently
17378 // aligned.
17379
17380 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
17381 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
17382 assert(IndexType == ISD::UNSIGNED_SCALED);
17383 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
17384 DAG.getSignedConstant(Addend, DL, PtrVT));
17385
17386 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
17388 SDValue StridedLoad = DAG.getStridedLoadVP(
17389 VT, DL, MGN->getChain(), BasePtr,
17390 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
17391 EVL, MGN->getMemOperand());
17392 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
17393 StridedLoad, MGN->getPassThru(), EVL);
17394 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
17395 DL);
17396 }
17397 }
17398
17399 SmallVector<int> ShuffleMask;
17400 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17401 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
17402 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
17403 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
17404 MGN->getMask(), DAG.getUNDEF(VT),
17405 MGN->getMemoryVT(), MGN->getMemOperand(),
17407 SDValue Shuffle =
17408 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
17409 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
17410 }
17411
17412 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17413 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
17414 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
17415 SmallVector<SDValue> NewIndices;
17416 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
17417 NewIndices.push_back(Index.getOperand(i));
17418 EVT IndexVT = Index.getValueType()
17420 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
17421
17422 unsigned ElementSize = VT.getScalarStoreSize();
17423 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
17424 auto EltCnt = VT.getVectorElementCount();
17425 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
17426 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
17427 EltCnt.divideCoefficientBy(2));
17428 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
17429 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
17430 EltCnt.divideCoefficientBy(2));
17431 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
17432
17433 SDValue Gather =
17434 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
17435 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
17436 Index, ScaleOp},
17437 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
17438 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
17439 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
17440 }
17441 break;
17442 }
17443 case ISD::MSCATTER:{
17444 const auto *MSN = cast<MaskedScatterSDNode>(N);
17445 SDValue Index = MSN->getIndex();
17446 SDValue ScaleOp = MSN->getScale();
17447 ISD::MemIndexType IndexType = MSN->getIndexType();
17448 assert(!MSN->isIndexScaled() &&
17449 "Scaled gather/scatter should not be formed");
17450
17451 SDLoc DL(N);
17452 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17453 return DAG.getMaskedScatter(
17454 N->getVTList(), MSN->getMemoryVT(), DL,
17455 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17456 Index, ScaleOp},
17457 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17458
17459 if (narrowIndex(Index, IndexType, DAG))
17460 return DAG.getMaskedScatter(
17461 N->getVTList(), MSN->getMemoryVT(), DL,
17462 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17463 Index, ScaleOp},
17464 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17465
17466 EVT VT = MSN->getValue()->getValueType(0);
17467 SmallVector<int> ShuffleMask;
17468 if (!MSN->isTruncatingStore() &&
17469 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
17470 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
17471 DAG.getUNDEF(VT), ShuffleMask);
17472 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
17473 DAG.getUNDEF(XLenVT), MSN->getMask(),
17474 MSN->getMemoryVT(), MSN->getMemOperand(),
17475 ISD::UNINDEXED, false);
17476 }
17477 break;
17478 }
17479 case ISD::VP_GATHER: {
17480 const auto *VPGN = cast<VPGatherSDNode>(N);
17481 SDValue Index = VPGN->getIndex();
17482 SDValue ScaleOp = VPGN->getScale();
17483 ISD::MemIndexType IndexType = VPGN->getIndexType();
17484 assert(!VPGN->isIndexScaled() &&
17485 "Scaled gather/scatter should not be formed");
17486
17487 SDLoc DL(N);
17488 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17489 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17490 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17491 ScaleOp, VPGN->getMask(),
17492 VPGN->getVectorLength()},
17493 VPGN->getMemOperand(), IndexType);
17494
17495 if (narrowIndex(Index, IndexType, DAG))
17496 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17497 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17498 ScaleOp, VPGN->getMask(),
17499 VPGN->getVectorLength()},
17500 VPGN->getMemOperand(), IndexType);
17501
17502 break;
17503 }
17504 case ISD::VP_SCATTER: {
17505 const auto *VPSN = cast<VPScatterSDNode>(N);
17506 SDValue Index = VPSN->getIndex();
17507 SDValue ScaleOp = VPSN->getScale();
17508 ISD::MemIndexType IndexType = VPSN->getIndexType();
17509 assert(!VPSN->isIndexScaled() &&
17510 "Scaled gather/scatter should not be formed");
17511
17512 SDLoc DL(N);
17513 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17514 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17515 {VPSN->getChain(), VPSN->getValue(),
17516 VPSN->getBasePtr(), Index, ScaleOp,
17517 VPSN->getMask(), VPSN->getVectorLength()},
17518 VPSN->getMemOperand(), IndexType);
17519
17520 if (narrowIndex(Index, IndexType, DAG))
17521 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17522 {VPSN->getChain(), VPSN->getValue(),
17523 VPSN->getBasePtr(), Index, ScaleOp,
17524 VPSN->getMask(), VPSN->getVectorLength()},
17525 VPSN->getMemOperand(), IndexType);
17526 break;
17527 }
17528 case RISCVISD::SHL_VL:
17529 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17530 return V;
17531 [[fallthrough]];
17532 case RISCVISD::SRA_VL:
17533 case RISCVISD::SRL_VL: {
17534 SDValue ShAmt = N->getOperand(1);
17536 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
17537 SDLoc DL(N);
17538 SDValue VL = N->getOperand(4);
17539 EVT VT = N->getValueType(0);
17540 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
17541 ShAmt.getOperand(1), VL);
17542 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
17543 N->getOperand(2), N->getOperand(3), N->getOperand(4));
17544 }
17545 break;
17546 }
17547 case ISD::SRA:
17548 if (SDValue V = performSRACombine(N, DAG, Subtarget))
17549 return V;
17550 [[fallthrough]];
17551 case ISD::SRL:
17552 case ISD::SHL: {
17553 if (N->getOpcode() == ISD::SHL) {
17554 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17555 return V;
17556 }
17557 SDValue ShAmt = N->getOperand(1);
17559 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
17560 SDLoc DL(N);
17561 EVT VT = N->getValueType(0);
17562 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
17563 ShAmt.getOperand(1),
17564 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
17565 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
17566 }
17567 break;
17568 }
17569 case RISCVISD::ADD_VL:
17570 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17571 return V;
17572 return combineToVWMACC(N, DAG, Subtarget);
17577 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
17578 case RISCVISD::SUB_VL:
17579 case RISCVISD::MUL_VL:
17580 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
17589 return performVFMADD_VLCombine(N, DCI, Subtarget);
17590 case RISCVISD::FADD_VL:
17591 case RISCVISD::FSUB_VL:
17592 case RISCVISD::FMUL_VL:
17595 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
17596 case ISD::LOAD:
17597 case ISD::STORE: {
17598 if (DCI.isAfterLegalizeDAG())
17599 if (SDValue V = performMemPairCombine(N, DCI))
17600 return V;
17601
17602 if (N->getOpcode() != ISD::STORE)
17603 break;
17604
17605 auto *Store = cast<StoreSDNode>(N);
17606 SDValue Chain = Store->getChain();
17607 EVT MemVT = Store->getMemoryVT();
17608 SDValue Val = Store->getValue();
17609 SDLoc DL(N);
17610
17611 bool IsScalarizable =
17612 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
17613 Store->isSimple() &&
17614 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
17615 isPowerOf2_64(MemVT.getSizeInBits()) &&
17616 MemVT.getSizeInBits() <= Subtarget.getXLen();
17617
17618 // If sufficiently aligned we can scalarize stores of constant vectors of
17619 // any power-of-two size up to XLen bits, provided that they aren't too
17620 // expensive to materialize.
17621 // vsetivli zero, 2, e8, m1, ta, ma
17622 // vmv.v.i v8, 4
17623 // vse64.v v8, (a0)
17624 // ->
17625 // li a1, 1028
17626 // sh a1, 0(a0)
17627 if (DCI.isBeforeLegalize() && IsScalarizable &&
17629 // Get the constant vector bits
17630 APInt NewC(Val.getValueSizeInBits(), 0);
17631 uint64_t EltSize = Val.getScalarValueSizeInBits();
17632 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
17633 if (Val.getOperand(i).isUndef())
17634 continue;
17635 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
17636 i * EltSize);
17637 }
17638 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17639
17640 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
17641 true) <= 2 &&
17643 NewVT, *Store->getMemOperand())) {
17644 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
17645 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
17646 Store->getPointerInfo(), Store->getOriginalAlign(),
17647 Store->getMemOperand()->getFlags());
17648 }
17649 }
17650
17651 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
17652 // vsetivli zero, 2, e16, m1, ta, ma
17653 // vle16.v v8, (a0)
17654 // vse16.v v8, (a1)
17655 if (auto *L = dyn_cast<LoadSDNode>(Val);
17656 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
17657 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
17658 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
17659 L->getMemoryVT() == MemVT) {
17660 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
17662 NewVT, *Store->getMemOperand()) &&
17664 NewVT, *L->getMemOperand())) {
17665 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
17666 L->getPointerInfo(), L->getOriginalAlign(),
17667 L->getMemOperand()->getFlags());
17668 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
17669 Store->getPointerInfo(), Store->getOriginalAlign(),
17670 Store->getMemOperand()->getFlags());
17671 }
17672 }
17673
17674 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
17675 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
17676 // any illegal types.
17677 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
17678 (DCI.isAfterLegalizeDAG() &&
17680 isNullConstant(Val.getOperand(1)))) {
17681 SDValue Src = Val.getOperand(0);
17682 MVT VecVT = Src.getSimpleValueType();
17683 // VecVT should be scalable and memory VT should match the element type.
17684 if (!Store->isIndexed() && VecVT.isScalableVector() &&
17685 MemVT == VecVT.getVectorElementType()) {
17686 SDLoc DL(N);
17687 MVT MaskVT = getMaskTypeFor(VecVT);
17688 return DAG.getStoreVP(
17689 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
17690 DAG.getConstant(1, DL, MaskVT),
17691 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
17692 Store->getMemOperand(), Store->getAddressingMode(),
17693 Store->isTruncatingStore(), /*IsCompress*/ false);
17694 }
17695 }
17696
17697 break;
17698 }
17699 case ISD::SPLAT_VECTOR: {
17700 EVT VT = N->getValueType(0);
17701 // Only perform this combine on legal MVT types.
17702 if (!isTypeLegal(VT))
17703 break;
17704 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
17705 DAG, Subtarget))
17706 return Gather;
17707 break;
17708 }
17709 case ISD::BUILD_VECTOR:
17710 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
17711 return V;
17712 break;
17714 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
17715 return V;
17716 break;
17718 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
17719 return V;
17720 break;
17721 case RISCVISD::VFMV_V_F_VL: {
17722 const MVT VT = N->getSimpleValueType(0);
17723 SDValue Passthru = N->getOperand(0);
17724 SDValue Scalar = N->getOperand(1);
17725 SDValue VL = N->getOperand(2);
17726
17727 // If VL is 1, we can use vfmv.s.f.
17728 if (isOneConstant(VL))
17729 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
17730 break;
17731 }
17732 case RISCVISD::VMV_V_X_VL: {
17733 const MVT VT = N->getSimpleValueType(0);
17734 SDValue Passthru = N->getOperand(0);
17735 SDValue Scalar = N->getOperand(1);
17736 SDValue VL = N->getOperand(2);
17737
17738 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
17739 // scalar input.
17740 unsigned ScalarSize = Scalar.getValueSizeInBits();
17741 unsigned EltWidth = VT.getScalarSizeInBits();
17742 if (ScalarSize > EltWidth && Passthru.isUndef())
17743 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
17744 return SDValue(N, 0);
17745
17746 // If VL is 1 and the scalar value won't benefit from immediate, we can
17747 // use vmv.s.x.
17749 if (isOneConstant(VL) &&
17750 (!Const || Const->isZero() ||
17751 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
17752 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
17753
17754 break;
17755 }
17756 case RISCVISD::VFMV_S_F_VL: {
17757 SDValue Src = N->getOperand(1);
17758 // Try to remove vector->scalar->vector if the scalar->vector is inserting
17759 // into an undef vector.
17760 // TODO: Could use a vslide or vmv.v.v for non-undef.
17761 if (N->getOperand(0).isUndef() &&
17762 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17763 isNullConstant(Src.getOperand(1)) &&
17764 Src.getOperand(0).getValueType().isScalableVector()) {
17765 EVT VT = N->getValueType(0);
17766 EVT SrcVT = Src.getOperand(0).getValueType();
17768 // Widths match, just return the original vector.
17769 if (SrcVT == VT)
17770 return Src.getOperand(0);
17771 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
17772 }
17773 [[fallthrough]];
17774 }
17775 case RISCVISD::VMV_S_X_VL: {
17776 const MVT VT = N->getSimpleValueType(0);
17777 SDValue Passthru = N->getOperand(0);
17778 SDValue Scalar = N->getOperand(1);
17779 SDValue VL = N->getOperand(2);
17780
17781 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
17782 Scalar.getOperand(0).getValueType() == N->getValueType(0))
17783 return Scalar.getOperand(0);
17784
17785 // Use M1 or smaller to avoid over constraining register allocation
17786 const MVT M1VT = getLMUL1VT(VT);
17787 if (M1VT.bitsLT(VT)) {
17788 SDValue M1Passthru =
17789 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
17790 DAG.getVectorIdxConstant(0, DL));
17791 SDValue Result =
17792 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
17793 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
17794 DAG.getVectorIdxConstant(0, DL));
17795 return Result;
17796 }
17797
17798 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
17799 // higher would involve overly constraining the register allocator for
17800 // no purpose.
17801 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
17802 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
17803 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
17804 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
17805
17806 break;
17807 }
17808 case RISCVISD::VMV_X_S: {
17809 SDValue Vec = N->getOperand(0);
17810 MVT VecVT = N->getOperand(0).getSimpleValueType();
17811 const MVT M1VT = getLMUL1VT(VecVT);
17812 if (M1VT.bitsLT(VecVT)) {
17813 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
17814 DAG.getVectorIdxConstant(0, DL));
17815 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
17816 }
17817 break;
17818 }
17822 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
17823 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
17824 switch (IntNo) {
17825 // By default we do not combine any intrinsic.
17826 default:
17827 return SDValue();
17828 case Intrinsic::riscv_vcpop:
17829 case Intrinsic::riscv_vcpop_mask:
17830 case Intrinsic::riscv_vfirst:
17831 case Intrinsic::riscv_vfirst_mask: {
17832 SDValue VL = N->getOperand(2);
17833 if (IntNo == Intrinsic::riscv_vcpop_mask ||
17834 IntNo == Intrinsic::riscv_vfirst_mask)
17835 VL = N->getOperand(3);
17836 if (!isNullConstant(VL))
17837 return SDValue();
17838 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
17839 SDLoc DL(N);
17840 EVT VT = N->getValueType(0);
17841 if (IntNo == Intrinsic::riscv_vfirst ||
17842 IntNo == Intrinsic::riscv_vfirst_mask)
17843 return DAG.getAllOnesConstant(DL, VT);
17844 return DAG.getConstant(0, DL, VT);
17845 }
17846 }
17847 }
17848 case ISD::BITCAST: {
17850 SDValue N0 = N->getOperand(0);
17851 EVT VT = N->getValueType(0);
17852 EVT SrcVT = N0.getValueType();
17853 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
17854 // type, widen both sides to avoid a trip through memory.
17855 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
17856 VT.isScalarInteger()) {
17857 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
17858 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
17859 Ops[0] = N0;
17860 SDLoc DL(N);
17861 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
17862 N0 = DAG.getBitcast(MVT::i8, N0);
17863 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
17864 }
17865
17866 return SDValue();
17867 }
17868 }
17869
17870 return SDValue();
17871}
17872
17874 EVT XVT, unsigned KeptBits) const {
17875 // For vectors, we don't have a preference..
17876 if (XVT.isVector())
17877 return false;
17878
17879 if (XVT != MVT::i32 && XVT != MVT::i64)
17880 return false;
17881
17882 // We can use sext.w for RV64 or an srai 31 on RV32.
17883 if (KeptBits == 32 || KeptBits == 64)
17884 return true;
17885
17886 // With Zbb we can use sext.h/sext.b.
17887 return Subtarget.hasStdExtZbb() &&
17888 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
17889 KeptBits == 16);
17890}
17891
17893 const SDNode *N, CombineLevel Level) const {
17894 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17895 N->getOpcode() == ISD::SRL) &&
17896 "Expected shift op");
17897
17898 // The following folds are only desirable if `(OP _, c1 << c2)` can be
17899 // materialised in fewer instructions than `(OP _, c1)`:
17900 //
17901 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
17902 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
17903 SDValue N0 = N->getOperand(0);
17904 EVT Ty = N0.getValueType();
17905 if (Ty.isScalarInteger() &&
17906 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17907 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17908 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17909 if (C1 && C2) {
17910 const APInt &C1Int = C1->getAPIntValue();
17911 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
17912
17913 // We can materialise `c1 << c2` into an add immediate, so it's "free",
17914 // and the combine should happen, to potentially allow further combines
17915 // later.
17916 if (ShiftedC1Int.getSignificantBits() <= 64 &&
17917 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
17918 return true;
17919
17920 // We can materialise `c1` in an add immediate, so it's "free", and the
17921 // combine should be prevented.
17922 if (C1Int.getSignificantBits() <= 64 &&
17924 return false;
17925
17926 // Neither constant will fit into an immediate, so find materialisation
17927 // costs.
17928 int C1Cost =
17929 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
17930 /*CompressionCost*/ true);
17931 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
17932 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
17933 /*CompressionCost*/ true);
17934
17935 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
17936 // combine should be prevented.
17937 if (C1Cost < ShiftedC1Cost)
17938 return false;
17939 }
17940 }
17941 return true;
17942}
17943
17945 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17946 TargetLoweringOpt &TLO) const {
17947 // Delay this optimization as late as possible.
17948 if (!TLO.LegalOps)
17949 return false;
17950
17951 EVT VT = Op.getValueType();
17952 if (VT.isVector())
17953 return false;
17954
17955 unsigned Opcode = Op.getOpcode();
17956 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17957 return false;
17958
17959 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17960 if (!C)
17961 return false;
17962
17963 const APInt &Mask = C->getAPIntValue();
17964
17965 // Clear all non-demanded bits initially.
17966 APInt ShrunkMask = Mask & DemandedBits;
17967
17968 // Try to make a smaller immediate by setting undemanded bits.
17969
17970 APInt ExpandedMask = Mask | ~DemandedBits;
17971
17972 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17973 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17974 };
17975 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17976 if (NewMask == Mask)
17977 return true;
17978 SDLoc DL(Op);
17979 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17980 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17981 Op.getOperand(0), NewC);
17982 return TLO.CombineTo(Op, NewOp);
17983 };
17984
17985 // If the shrunk mask fits in sign extended 12 bits, let the target
17986 // independent code apply it.
17987 if (ShrunkMask.isSignedIntN(12))
17988 return false;
17989
17990 // And has a few special cases for zext.
17991 if (Opcode == ISD::AND) {
17992 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17993 // otherwise use SLLI + SRLI.
17994 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17995 if (IsLegalMask(NewMask))
17996 return UseMask(NewMask);
17997
17998 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17999 if (VT == MVT::i64) {
18000 APInt NewMask = APInt(64, 0xffffffff);
18001 if (IsLegalMask(NewMask))
18002 return UseMask(NewMask);
18003 }
18004 }
18005
18006 // For the remaining optimizations, we need to be able to make a negative
18007 // number through a combination of mask and undemanded bits.
18008 if (!ExpandedMask.isNegative())
18009 return false;
18010
18011 // What is the fewest number of bits we need to represent the negative number.
18012 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18013
18014 // Try to make a 12 bit negative immediate. If that fails try to make a 32
18015 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18016 // If we can't create a simm12, we shouldn't change opaque constants.
18017 APInt NewMask = ShrunkMask;
18018 if (MinSignedBits <= 12)
18019 NewMask.setBitsFrom(11);
18020 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18021 NewMask.setBitsFrom(31);
18022 else
18023 return false;
18024
18025 // Check that our new mask is a subset of the demanded mask.
18026 assert(IsLegalMask(NewMask));
18027 return UseMask(NewMask);
18028}
18029
18030static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18031 static const uint64_t GREVMasks[] = {
18032 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18033 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18034
18035 for (unsigned Stage = 0; Stage != 6; ++Stage) {
18036 unsigned Shift = 1 << Stage;
18037 if (ShAmt & Shift) {
18038 uint64_t Mask = GREVMasks[Stage];
18039 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18040 if (IsGORC)
18041 Res |= x;
18042 x = Res;
18043 }
18044 }
18045
18046 return x;
18047}
18048
18050 KnownBits &Known,
18051 const APInt &DemandedElts,
18052 const SelectionDAG &DAG,
18053 unsigned Depth) const {
18054 unsigned BitWidth = Known.getBitWidth();
18055 unsigned Opc = Op.getOpcode();
18056 assert((Opc >= ISD::BUILTIN_OP_END ||
18057 Opc == ISD::INTRINSIC_WO_CHAIN ||
18058 Opc == ISD::INTRINSIC_W_CHAIN ||
18059 Opc == ISD::INTRINSIC_VOID) &&
18060 "Should use MaskedValueIsZero if you don't know whether Op"
18061 " is a target node!");
18062
18063 Known.resetAll();
18064 switch (Opc) {
18065 default: break;
18066 case RISCVISD::SELECT_CC: {
18067 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18068 // If we don't know any bits, early out.
18069 if (Known.isUnknown())
18070 break;
18071 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
18072
18073 // Only known if known in both the LHS and RHS.
18074 Known = Known.intersectWith(Known2);
18075 break;
18076 }
18079 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18080 // Result is either all zero or operand 0. We can propagate zeros, but not
18081 // ones.
18082 Known.One.clearAllBits();
18083 break;
18084 case RISCVISD::REMUW: {
18085 KnownBits Known2;
18086 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18087 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18088 // We only care about the lower 32 bits.
18089 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18090 // Restore the original width by sign extending.
18091 Known = Known.sext(BitWidth);
18092 break;
18093 }
18094 case RISCVISD::DIVUW: {
18095 KnownBits Known2;
18096 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18097 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18098 // We only care about the lower 32 bits.
18099 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18100 // Restore the original width by sign extending.
18101 Known = Known.sext(BitWidth);
18102 break;
18103 }
18104 case RISCVISD::SLLW: {
18105 KnownBits Known2;
18106 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18107 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18108 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18109 // Restore the original width by sign extending.
18110 Known = Known.sext(BitWidth);
18111 break;
18112 }
18113 case RISCVISD::CTZW: {
18114 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18115 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18116 unsigned LowBits = llvm::bit_width(PossibleTZ);
18117 Known.Zero.setBitsFrom(LowBits);
18118 break;
18119 }
18120 case RISCVISD::CLZW: {
18121 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18122 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18123 unsigned LowBits = llvm::bit_width(PossibleLZ);
18124 Known.Zero.setBitsFrom(LowBits);
18125 break;
18126 }
18127 case RISCVISD::BREV8:
18128 case RISCVISD::ORC_B: {
18129 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18130 // control value of 7 is equivalent to brev8 and orc.b.
18131 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18132 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
18133 // To compute zeros, we need to invert the value and invert it back after.
18134 Known.Zero =
18135 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18136 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18137 break;
18138 }
18139 case RISCVISD::READ_VLENB: {
18140 // We can use the minimum and maximum VLEN values to bound VLENB. We
18141 // know VLEN must be a power of two.
18142 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18143 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18144 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
18145 Known.Zero.setLowBits(Log2_32(MinVLenB));
18146 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18147 if (MaxVLenB == MinVLenB)
18148 Known.One.setBit(Log2_32(MinVLenB));
18149 break;
18150 }
18151 case RISCVISD::FCLASS: {
18152 // fclass will only set one of the low 10 bits.
18153 Known.Zero.setBitsFrom(10);
18154 break;
18155 }
18158 unsigned IntNo =
18159 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
18160 switch (IntNo) {
18161 default:
18162 // We can't do anything for most intrinsics.
18163 break;
18164 case Intrinsic::riscv_vsetvli:
18165 case Intrinsic::riscv_vsetvlimax: {
18166 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
18167 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
18168 RISCVII::VLMUL VLMUL =
18169 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
18170 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
18171 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
18172 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
18173 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
18174
18175 // Result of vsetvli must be not larger than AVL.
18176 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
18177 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
18178
18179 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
18180 if (BitWidth > KnownZeroFirstBit)
18181 Known.Zero.setBitsFrom(KnownZeroFirstBit);
18182 break;
18183 }
18184 }
18185 break;
18186 }
18187 }
18188}
18189
18191 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18192 unsigned Depth) const {
18193 switch (Op.getOpcode()) {
18194 default:
18195 break;
18196 case RISCVISD::SELECT_CC: {
18197 unsigned Tmp =
18198 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
18199 if (Tmp == 1) return 1; // Early out.
18200 unsigned Tmp2 =
18201 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
18202 return std::min(Tmp, Tmp2);
18203 }
18206 // Output is either all zero or operand 0. We can propagate sign bit count
18207 // from operand 0.
18208 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18209 case RISCVISD::ABSW: {
18210 // We expand this at isel to negw+max. The result will have 33 sign bits
18211 // if the input has at least 33 sign bits.
18212 unsigned Tmp =
18213 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18214 if (Tmp < 33) return 1;
18215 return 33;
18216 }
18217 case RISCVISD::SLLW:
18218 case RISCVISD::SRAW:
18219 case RISCVISD::SRLW:
18220 case RISCVISD::DIVW:
18221 case RISCVISD::DIVUW:
18222 case RISCVISD::REMUW:
18223 case RISCVISD::ROLW:
18224 case RISCVISD::RORW:
18229 // TODO: As the result is sign-extended, this is conservatively correct. A
18230 // more precise answer could be calculated for SRAW depending on known
18231 // bits in the shift amount.
18232 return 33;
18233 case RISCVISD::VMV_X_S: {
18234 // The number of sign bits of the scalar result is computed by obtaining the
18235 // element type of the input vector operand, subtracting its width from the
18236 // XLEN, and then adding one (sign bit within the element type). If the
18237 // element type is wider than XLen, the least-significant XLEN bits are
18238 // taken.
18239 unsigned XLen = Subtarget.getXLen();
18240 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
18241 if (EltBits <= XLen)
18242 return XLen - EltBits + 1;
18243 break;
18244 }
18246 unsigned IntNo = Op.getConstantOperandVal(1);
18247 switch (IntNo) {
18248 default:
18249 break;
18250 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
18251 case Intrinsic::riscv_masked_atomicrmw_add_i64:
18252 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
18253 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
18254 case Intrinsic::riscv_masked_atomicrmw_max_i64:
18255 case Intrinsic::riscv_masked_atomicrmw_min_i64:
18256 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
18257 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
18258 case Intrinsic::riscv_masked_cmpxchg_i64:
18259 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
18260 // narrow atomic operation. These are implemented using atomic
18261 // operations at the minimum supported atomicrmw/cmpxchg width whose
18262 // result is then sign extended to XLEN. With +A, the minimum width is
18263 // 32 for both 64 and 32.
18264 assert(Subtarget.getXLen() == 64);
18266 assert(Subtarget.hasStdExtA());
18267 return 33;
18268 }
18269 break;
18270 }
18271 }
18272
18273 return 1;
18274}
18275
18277 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18278 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
18279
18280 // TODO: Add more target nodes.
18281 switch (Op.getOpcode()) {
18283 // Integer select_cc cannot create poison.
18284 // TODO: What are the FP poison semantics?
18285 // TODO: This instruction blocks poison from the unselected operand, can
18286 // we do anything with that?
18287 return !Op.getValueType().isInteger();
18288 }
18290 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
18291}
18292
18293const Constant *
18295 assert(Ld && "Unexpected null LoadSDNode");
18296 if (!ISD::isNormalLoad(Ld))
18297 return nullptr;
18298
18299 SDValue Ptr = Ld->getBasePtr();
18300
18301 // Only constant pools with no offset are supported.
18302 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
18303 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
18304 if (!CNode || CNode->isMachineConstantPoolEntry() ||
18305 CNode->getOffset() != 0)
18306 return nullptr;
18307
18308 return CNode;
18309 };
18310
18311 // Simple case, LLA.
18312 if (Ptr.getOpcode() == RISCVISD::LLA) {
18313 auto *CNode = GetSupportedConstantPool(Ptr);
18314 if (!CNode || CNode->getTargetFlags() != 0)
18315 return nullptr;
18316
18317 return CNode->getConstVal();
18318 }
18319
18320 // Look for a HI and ADD_LO pair.
18321 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
18322 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
18323 return nullptr;
18324
18325 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
18326 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
18327
18328 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
18329 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
18330 return nullptr;
18331
18332 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
18333 return nullptr;
18334
18335 return CNodeLo->getConstVal();
18336}
18337
18339 MachineBasicBlock *BB) {
18340 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
18341
18342 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
18343 // Should the count have wrapped while it was being read, we need to try
18344 // again.
18345 // For example:
18346 // ```
18347 // read:
18348 // csrrs x3, counterh # load high word of counter
18349 // csrrs x2, counter # load low word of counter
18350 // csrrs x4, counterh # load high word of counter
18351 // bne x3, x4, read # check if high word reads match, otherwise try again
18352 // ```
18353
18354 MachineFunction &MF = *BB->getParent();
18355 const BasicBlock *LLVMBB = BB->getBasicBlock();
18357
18358 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
18359 MF.insert(It, LoopMBB);
18360
18361 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
18362 MF.insert(It, DoneMBB);
18363
18364 // Transfer the remainder of BB and its successor edges to DoneMBB.
18365 DoneMBB->splice(DoneMBB->begin(), BB,
18366 std::next(MachineBasicBlock::iterator(MI)), BB->end());
18368
18369 BB->addSuccessor(LoopMBB);
18370
18372 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18373 Register LoReg = MI.getOperand(0).getReg();
18374 Register HiReg = MI.getOperand(1).getReg();
18375 int64_t LoCounter = MI.getOperand(2).getImm();
18376 int64_t HiCounter = MI.getOperand(3).getImm();
18377 DebugLoc DL = MI.getDebugLoc();
18378
18380 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
18381 .addImm(HiCounter)
18382 .addReg(RISCV::X0);
18383 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
18384 .addImm(LoCounter)
18385 .addReg(RISCV::X0);
18386 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
18387 .addImm(HiCounter)
18388 .addReg(RISCV::X0);
18389
18390 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
18391 .addReg(HiReg)
18392 .addReg(ReadAgainReg)
18393 .addMBB(LoopMBB);
18394
18395 LoopMBB->addSuccessor(LoopMBB);
18396 LoopMBB->addSuccessor(DoneMBB);
18397
18398 MI.eraseFromParent();
18399
18400 return DoneMBB;
18401}
18402
18405 const RISCVSubtarget &Subtarget) {
18406 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
18407
18408 MachineFunction &MF = *BB->getParent();
18409 DebugLoc DL = MI.getDebugLoc();
18412 Register LoReg = MI.getOperand(0).getReg();
18413 Register HiReg = MI.getOperand(1).getReg();
18414 Register SrcReg = MI.getOperand(2).getReg();
18415
18416 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
18417 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18418
18419 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
18420 RI, Register());
18422 MachineMemOperand *MMOLo =
18426 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
18427 .addFrameIndex(FI)
18428 .addImm(0)
18429 .addMemOperand(MMOLo);
18430 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
18431 .addFrameIndex(FI)
18432 .addImm(4)
18433 .addMemOperand(MMOHi);
18434 MI.eraseFromParent(); // The pseudo instruction is gone now.
18435 return BB;
18436}
18437
18440 const RISCVSubtarget &Subtarget) {
18441 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
18442 "Unexpected instruction");
18443
18444 MachineFunction &MF = *BB->getParent();
18445 DebugLoc DL = MI.getDebugLoc();
18448 Register DstReg = MI.getOperand(0).getReg();
18449 Register LoReg = MI.getOperand(1).getReg();
18450 Register HiReg = MI.getOperand(2).getReg();
18451
18452 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
18453 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18454
18456 MachineMemOperand *MMOLo =
18460 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18461 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
18462 .addFrameIndex(FI)
18463 .addImm(0)
18464 .addMemOperand(MMOLo);
18465 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18466 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
18467 .addFrameIndex(FI)
18468 .addImm(4)
18469 .addMemOperand(MMOHi);
18470 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
18471 MI.eraseFromParent(); // The pseudo instruction is gone now.
18472 return BB;
18473}
18474
18476 switch (MI.getOpcode()) {
18477 default:
18478 return false;
18479 case RISCV::Select_GPR_Using_CC_GPR:
18480 case RISCV::Select_GPR_Using_CC_Imm:
18481 case RISCV::Select_FPR16_Using_CC_GPR:
18482 case RISCV::Select_FPR16INX_Using_CC_GPR:
18483 case RISCV::Select_FPR32_Using_CC_GPR:
18484 case RISCV::Select_FPR32INX_Using_CC_GPR:
18485 case RISCV::Select_FPR64_Using_CC_GPR:
18486 case RISCV::Select_FPR64INX_Using_CC_GPR:
18487 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18488 return true;
18489 }
18490}
18491
18493 unsigned RelOpcode, unsigned EqOpcode,
18494 const RISCVSubtarget &Subtarget) {
18495 DebugLoc DL = MI.getDebugLoc();
18496 Register DstReg = MI.getOperand(0).getReg();
18497 Register Src1Reg = MI.getOperand(1).getReg();
18498 Register Src2Reg = MI.getOperand(2).getReg();
18500 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18502
18503 // Save the current FFLAGS.
18504 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
18505
18506 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
18507 .addReg(Src1Reg)
18508 .addReg(Src2Reg);
18511
18512 // Restore the FFLAGS.
18513 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18514 .addReg(SavedFFlags, RegState::Kill);
18515
18516 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
18517 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
18518 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
18519 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
18522
18523 // Erase the pseudoinstruction.
18524 MI.eraseFromParent();
18525 return BB;
18526}
18527
18528static MachineBasicBlock *
18530 MachineBasicBlock *ThisMBB,
18531 const RISCVSubtarget &Subtarget) {
18532 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
18533 // Without this, custom-inserter would have generated:
18534 //
18535 // A
18536 // | \
18537 // | B
18538 // | /
18539 // C
18540 // | \
18541 // | D
18542 // | /
18543 // E
18544 //
18545 // A: X = ...; Y = ...
18546 // B: empty
18547 // C: Z = PHI [X, A], [Y, B]
18548 // D: empty
18549 // E: PHI [X, C], [Z, D]
18550 //
18551 // If we lower both Select_FPRX_ in a single step, we can instead generate:
18552 //
18553 // A
18554 // | \
18555 // | C
18556 // | /|
18557 // |/ |
18558 // | |
18559 // | D
18560 // | /
18561 // E
18562 //
18563 // A: X = ...; Y = ...
18564 // D: empty
18565 // E: PHI [X, A], [X, C], [Y, D]
18566
18567 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18568 const DebugLoc &DL = First.getDebugLoc();
18569 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
18570 MachineFunction *F = ThisMBB->getParent();
18571 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
18572 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
18573 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
18574 MachineFunction::iterator It = ++ThisMBB->getIterator();
18575 F->insert(It, FirstMBB);
18576 F->insert(It, SecondMBB);
18577 F->insert(It, SinkMBB);
18578
18579 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
18580 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
18582 ThisMBB->end());
18583 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
18584
18585 // Fallthrough block for ThisMBB.
18586 ThisMBB->addSuccessor(FirstMBB);
18587 // Fallthrough block for FirstMBB.
18588 FirstMBB->addSuccessor(SecondMBB);
18589 ThisMBB->addSuccessor(SinkMBB);
18590 FirstMBB->addSuccessor(SinkMBB);
18591 // This is fallthrough.
18592 SecondMBB->addSuccessor(SinkMBB);
18593
18594 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
18595 Register FLHS = First.getOperand(1).getReg();
18596 Register FRHS = First.getOperand(2).getReg();
18597 // Insert appropriate branch.
18598 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
18599 .addReg(FLHS)
18600 .addReg(FRHS)
18601 .addMBB(SinkMBB);
18602
18603 Register SLHS = Second.getOperand(1).getReg();
18604 Register SRHS = Second.getOperand(2).getReg();
18605 Register Op1Reg4 = First.getOperand(4).getReg();
18606 Register Op1Reg5 = First.getOperand(5).getReg();
18607
18608 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
18609 // Insert appropriate branch.
18610 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
18611 .addReg(SLHS)
18612 .addReg(SRHS)
18613 .addMBB(SinkMBB);
18614
18615 Register DestReg = Second.getOperand(0).getReg();
18616 Register Op2Reg4 = Second.getOperand(4).getReg();
18617 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
18618 .addReg(Op2Reg4)
18619 .addMBB(ThisMBB)
18620 .addReg(Op1Reg4)
18621 .addMBB(FirstMBB)
18622 .addReg(Op1Reg5)
18623 .addMBB(SecondMBB);
18624
18625 // Now remove the Select_FPRX_s.
18626 First.eraseFromParent();
18627 Second.eraseFromParent();
18628 return SinkMBB;
18629}
18630
18633 const RISCVSubtarget &Subtarget) {
18634 // To "insert" Select_* instructions, we actually have to insert the triangle
18635 // control-flow pattern. The incoming instructions know the destination vreg
18636 // to set, the condition code register to branch on, the true/false values to
18637 // select between, and the condcode to use to select the appropriate branch.
18638 //
18639 // We produce the following control flow:
18640 // HeadMBB
18641 // | \
18642 // | IfFalseMBB
18643 // | /
18644 // TailMBB
18645 //
18646 // When we find a sequence of selects we attempt to optimize their emission
18647 // by sharing the control flow. Currently we only handle cases where we have
18648 // multiple selects with the exact same condition (same LHS, RHS and CC).
18649 // The selects may be interleaved with other instructions if the other
18650 // instructions meet some requirements we deem safe:
18651 // - They are not pseudo instructions.
18652 // - They are debug instructions. Otherwise,
18653 // - They do not have side-effects, do not access memory and their inputs do
18654 // not depend on the results of the select pseudo-instructions.
18655 // The TrueV/FalseV operands of the selects cannot depend on the result of
18656 // previous selects in the sequence.
18657 // These conditions could be further relaxed. See the X86 target for a
18658 // related approach and more information.
18659 //
18660 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
18661 // is checked here and handled by a separate function -
18662 // EmitLoweredCascadedSelect.
18663
18664 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
18665 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
18666 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
18667 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
18668 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
18669 Next->getOperand(5).isKill())
18670 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
18671
18672 Register LHS = MI.getOperand(1).getReg();
18673 Register RHS;
18674 if (MI.getOperand(2).isReg())
18675 RHS = MI.getOperand(2).getReg();
18676 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
18677
18678 SmallVector<MachineInstr *, 4> SelectDebugValues;
18679 SmallSet<Register, 4> SelectDests;
18680 SelectDests.insert(MI.getOperand(0).getReg());
18681
18682 MachineInstr *LastSelectPseudo = &MI;
18683 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
18684 SequenceMBBI != E; ++SequenceMBBI) {
18685 if (SequenceMBBI->isDebugInstr())
18686 continue;
18687 if (isSelectPseudo(*SequenceMBBI)) {
18688 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
18689 !SequenceMBBI->getOperand(2).isReg() ||
18690 SequenceMBBI->getOperand(2).getReg() != RHS ||
18691 SequenceMBBI->getOperand(3).getImm() != CC ||
18692 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
18693 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
18694 break;
18695 LastSelectPseudo = &*SequenceMBBI;
18696 SequenceMBBI->collectDebugValues(SelectDebugValues);
18697 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
18698 continue;
18699 }
18700 if (SequenceMBBI->hasUnmodeledSideEffects() ||
18701 SequenceMBBI->mayLoadOrStore() ||
18702 SequenceMBBI->usesCustomInsertionHook())
18703 break;
18704 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
18705 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
18706 }))
18707 break;
18708 }
18709
18710 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18711 const BasicBlock *LLVM_BB = BB->getBasicBlock();
18712 DebugLoc DL = MI.getDebugLoc();
18714
18715 MachineBasicBlock *HeadMBB = BB;
18716 MachineFunction *F = BB->getParent();
18717 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
18718 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
18719
18720 F->insert(I, IfFalseMBB);
18721 F->insert(I, TailMBB);
18722
18723 // Set the call frame size on entry to the new basic blocks.
18724 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
18725 IfFalseMBB->setCallFrameSize(CallFrameSize);
18726 TailMBB->setCallFrameSize(CallFrameSize);
18727
18728 // Transfer debug instructions associated with the selects to TailMBB.
18729 for (MachineInstr *DebugInstr : SelectDebugValues) {
18730 TailMBB->push_back(DebugInstr->removeFromParent());
18731 }
18732
18733 // Move all instructions after the sequence to TailMBB.
18734 TailMBB->splice(TailMBB->end(), HeadMBB,
18735 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
18736 // Update machine-CFG edges by transferring all successors of the current
18737 // block to the new block which will contain the Phi nodes for the selects.
18738 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
18739 // Set the successors for HeadMBB.
18740 HeadMBB->addSuccessor(IfFalseMBB);
18741 HeadMBB->addSuccessor(TailMBB);
18742
18743 // Insert appropriate branch.
18744 if (MI.getOperand(2).isImm())
18745 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
18746 .addReg(LHS)
18747 .addImm(MI.getOperand(2).getImm())
18748 .addMBB(TailMBB);
18749 else
18750 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
18751 .addReg(LHS)
18752 .addReg(RHS)
18753 .addMBB(TailMBB);
18754
18755 // IfFalseMBB just falls through to TailMBB.
18756 IfFalseMBB->addSuccessor(TailMBB);
18757
18758 // Create PHIs for all of the select pseudo-instructions.
18759 auto SelectMBBI = MI.getIterator();
18760 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
18761 auto InsertionPoint = TailMBB->begin();
18762 while (SelectMBBI != SelectEnd) {
18763 auto Next = std::next(SelectMBBI);
18764 if (isSelectPseudo(*SelectMBBI)) {
18765 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
18766 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
18767 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
18768 .addReg(SelectMBBI->getOperand(4).getReg())
18769 .addMBB(HeadMBB)
18770 .addReg(SelectMBBI->getOperand(5).getReg())
18771 .addMBB(IfFalseMBB);
18772 SelectMBBI->eraseFromParent();
18773 }
18774 SelectMBBI = Next;
18775 }
18776
18777 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
18778 return TailMBB;
18779}
18780
18781// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
18782static const RISCV::RISCVMaskedPseudoInfo *
18783lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
18785 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
18786 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
18788 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
18789 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
18790 return Masked;
18791}
18792
18795 unsigned CVTXOpc) {
18796 DebugLoc DL = MI.getDebugLoc();
18797
18799
18801 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18802
18803 // Save the old value of FFLAGS.
18804 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
18805
18806 assert(MI.getNumOperands() == 7);
18807
18808 // Emit a VFCVT_X_F
18809 const TargetRegisterInfo *TRI =
18811 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
18812 Register Tmp = MRI.createVirtualRegister(RC);
18813 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
18814 .add(MI.getOperand(1))
18815 .add(MI.getOperand(2))
18816 .add(MI.getOperand(3))
18817 .add(MachineOperand::CreateImm(7)) // frm = DYN
18818 .add(MI.getOperand(4))
18819 .add(MI.getOperand(5))
18820 .add(MI.getOperand(6))
18821 .add(MachineOperand::CreateReg(RISCV::FRM,
18822 /*IsDef*/ false,
18823 /*IsImp*/ true));
18824
18825 // Emit a VFCVT_F_X
18826 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
18827 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
18828 // There is no E8 variant for VFCVT_F_X.
18829 assert(Log2SEW >= 4);
18830 unsigned CVTFOpc =
18831 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
18832 ->MaskedPseudo;
18833
18834 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
18835 .add(MI.getOperand(0))
18836 .add(MI.getOperand(1))
18837 .addReg(Tmp)
18838 .add(MI.getOperand(3))
18839 .add(MachineOperand::CreateImm(7)) // frm = DYN
18840 .add(MI.getOperand(4))
18841 .add(MI.getOperand(5))
18842 .add(MI.getOperand(6))
18843 .add(MachineOperand::CreateReg(RISCV::FRM,
18844 /*IsDef*/ false,
18845 /*IsImp*/ true));
18846
18847 // Restore FFLAGS.
18848 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18849 .addReg(SavedFFLAGS, RegState::Kill);
18850
18851 // Erase the pseudoinstruction.
18852 MI.eraseFromParent();
18853 return BB;
18854}
18855
18857 const RISCVSubtarget &Subtarget) {
18858 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
18859 const TargetRegisterClass *RC;
18860 switch (MI.getOpcode()) {
18861 default:
18862 llvm_unreachable("Unexpected opcode");
18863 case RISCV::PseudoFROUND_H:
18864 CmpOpc = RISCV::FLT_H;
18865 F2IOpc = RISCV::FCVT_W_H;
18866 I2FOpc = RISCV::FCVT_H_W;
18867 FSGNJOpc = RISCV::FSGNJ_H;
18868 FSGNJXOpc = RISCV::FSGNJX_H;
18869 RC = &RISCV::FPR16RegClass;
18870 break;
18871 case RISCV::PseudoFROUND_H_INX:
18872 CmpOpc = RISCV::FLT_H_INX;
18873 F2IOpc = RISCV::FCVT_W_H_INX;
18874 I2FOpc = RISCV::FCVT_H_W_INX;
18875 FSGNJOpc = RISCV::FSGNJ_H_INX;
18876 FSGNJXOpc = RISCV::FSGNJX_H_INX;
18877 RC = &RISCV::GPRF16RegClass;
18878 break;
18879 case RISCV::PseudoFROUND_S:
18880 CmpOpc = RISCV::FLT_S;
18881 F2IOpc = RISCV::FCVT_W_S;
18882 I2FOpc = RISCV::FCVT_S_W;
18883 FSGNJOpc = RISCV::FSGNJ_S;
18884 FSGNJXOpc = RISCV::FSGNJX_S;
18885 RC = &RISCV::FPR32RegClass;
18886 break;
18887 case RISCV::PseudoFROUND_S_INX:
18888 CmpOpc = RISCV::FLT_S_INX;
18889 F2IOpc = RISCV::FCVT_W_S_INX;
18890 I2FOpc = RISCV::FCVT_S_W_INX;
18891 FSGNJOpc = RISCV::FSGNJ_S_INX;
18892 FSGNJXOpc = RISCV::FSGNJX_S_INX;
18893 RC = &RISCV::GPRF32RegClass;
18894 break;
18895 case RISCV::PseudoFROUND_D:
18896 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18897 CmpOpc = RISCV::FLT_D;
18898 F2IOpc = RISCV::FCVT_L_D;
18899 I2FOpc = RISCV::FCVT_D_L;
18900 FSGNJOpc = RISCV::FSGNJ_D;
18901 FSGNJXOpc = RISCV::FSGNJX_D;
18902 RC = &RISCV::FPR64RegClass;
18903 break;
18904 case RISCV::PseudoFROUND_D_INX:
18905 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18906 CmpOpc = RISCV::FLT_D_INX;
18907 F2IOpc = RISCV::FCVT_L_D_INX;
18908 I2FOpc = RISCV::FCVT_D_L_INX;
18909 FSGNJOpc = RISCV::FSGNJ_D_INX;
18910 FSGNJXOpc = RISCV::FSGNJX_D_INX;
18911 RC = &RISCV::GPRRegClass;
18912 break;
18913 }
18914
18915 const BasicBlock *BB = MBB->getBasicBlock();
18916 DebugLoc DL = MI.getDebugLoc();
18918
18920 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
18921 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
18922
18923 F->insert(I, CvtMBB);
18924 F->insert(I, DoneMBB);
18925 // Move all instructions after the sequence to DoneMBB.
18926 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
18927 MBB->end());
18928 // Update machine-CFG edges by transferring all successors of the current
18929 // block to the new block which will contain the Phi nodes for the selects.
18931 // Set the successors for MBB.
18932 MBB->addSuccessor(CvtMBB);
18933 MBB->addSuccessor(DoneMBB);
18934
18935 Register DstReg = MI.getOperand(0).getReg();
18936 Register SrcReg = MI.getOperand(1).getReg();
18937 Register MaxReg = MI.getOperand(2).getReg();
18938 int64_t FRM = MI.getOperand(3).getImm();
18939
18940 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18942
18943 Register FabsReg = MRI.createVirtualRegister(RC);
18944 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
18945
18946 // Compare the FP value to the max value.
18947 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18948 auto MIB =
18949 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18952
18953 // Insert branch.
18954 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18955 .addReg(CmpReg)
18956 .addReg(RISCV::X0)
18957 .addMBB(DoneMBB);
18958
18959 CvtMBB->addSuccessor(DoneMBB);
18960
18961 // Convert to integer.
18962 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18963 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18966
18967 // Convert back to FP.
18968 Register I2FReg = MRI.createVirtualRegister(RC);
18969 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18972
18973 // Restore the sign bit.
18974 Register CvtReg = MRI.createVirtualRegister(RC);
18975 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18976
18977 // Merge the results.
18978 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18979 .addReg(SrcReg)
18980 .addMBB(MBB)
18981 .addReg(CvtReg)
18982 .addMBB(CvtMBB);
18983
18984 MI.eraseFromParent();
18985 return DoneMBB;
18986}
18987
18990 MachineBasicBlock *BB) const {
18991 switch (MI.getOpcode()) {
18992 default:
18993 llvm_unreachable("Unexpected instr type to insert");
18994 case RISCV::ReadCounterWide:
18995 assert(!Subtarget.is64Bit() &&
18996 "ReadCounterWide is only to be used on riscv32");
18997 return emitReadCounterWidePseudo(MI, BB);
18998 case RISCV::Select_GPR_Using_CC_GPR:
18999 case RISCV::Select_GPR_Using_CC_Imm:
19000 case RISCV::Select_FPR16_Using_CC_GPR:
19001 case RISCV::Select_FPR16INX_Using_CC_GPR:
19002 case RISCV::Select_FPR32_Using_CC_GPR:
19003 case RISCV::Select_FPR32INX_Using_CC_GPR:
19004 case RISCV::Select_FPR64_Using_CC_GPR:
19005 case RISCV::Select_FPR64INX_Using_CC_GPR:
19006 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19007 return emitSelectPseudo(MI, BB, Subtarget);
19008 case RISCV::BuildPairF64Pseudo:
19009 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19010 case RISCV::SplitF64Pseudo:
19011 return emitSplitF64Pseudo(MI, BB, Subtarget);
19012 case RISCV::PseudoQuietFLE_H:
19013 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19014 case RISCV::PseudoQuietFLE_H_INX:
19015 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19016 case RISCV::PseudoQuietFLT_H:
19017 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19018 case RISCV::PseudoQuietFLT_H_INX:
19019 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19020 case RISCV::PseudoQuietFLE_S:
19021 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19022 case RISCV::PseudoQuietFLE_S_INX:
19023 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19024 case RISCV::PseudoQuietFLT_S:
19025 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19026 case RISCV::PseudoQuietFLT_S_INX:
19027 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19028 case RISCV::PseudoQuietFLE_D:
19029 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19030 case RISCV::PseudoQuietFLE_D_INX:
19031 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19032 case RISCV::PseudoQuietFLE_D_IN32X:
19033 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19034 Subtarget);
19035 case RISCV::PseudoQuietFLT_D:
19036 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19037 case RISCV::PseudoQuietFLT_D_INX:
19038 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19039 case RISCV::PseudoQuietFLT_D_IN32X:
19040 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19041 Subtarget);
19042
19043 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19044 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19045 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19046 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19047 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19048 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19049 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19050 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19051 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19052 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19053 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19054 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19055 case RISCV::PseudoFROUND_H:
19056 case RISCV::PseudoFROUND_H_INX:
19057 case RISCV::PseudoFROUND_S:
19058 case RISCV::PseudoFROUND_S_INX:
19059 case RISCV::PseudoFROUND_D:
19060 case RISCV::PseudoFROUND_D_INX:
19061 case RISCV::PseudoFROUND_D_IN32X:
19062 return emitFROUND(MI, BB, Subtarget);
19063 case TargetOpcode::STATEPOINT:
19064 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19065 // while jal call instruction (where statepoint will be lowered at the end)
19066 // has implicit def. This def is early-clobber as it will be set at
19067 // the moment of the call and earlier than any use is read.
19068 // Add this implicit dead def here as a workaround.
19069 MI.addOperand(*MI.getMF(),
19071 RISCV::X1, /*isDef*/ true,
19072 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
19073 /*isUndef*/ false, /*isEarlyClobber*/ true));
19074 [[fallthrough]];
19075 case TargetOpcode::STACKMAP:
19076 case TargetOpcode::PATCHPOINT:
19077 if (!Subtarget.is64Bit())
19078 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19079 "supported on 64-bit targets");
19080 return emitPatchPoint(MI, BB);
19081 }
19082}
19083
19085 SDNode *Node) const {
19086 // Add FRM dependency to any instructions with dynamic rounding mode.
19087 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19088 if (Idx < 0) {
19089 // Vector pseudos have FRM index indicated by TSFlags.
19090 Idx = RISCVII::getFRMOpNum(MI.getDesc());
19091 if (Idx < 0)
19092 return;
19093 }
19094 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
19095 return;
19096 // If the instruction already reads FRM, don't add another read.
19097 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
19098 return;
19099 MI.addOperand(
19100 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
19101}
19102
19103void RISCVTargetLowering::analyzeInputArgs(
19104 MachineFunction &MF, CCState &CCInfo,
19105 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19106 RISCVCCAssignFn Fn) const {
19107 unsigned NumArgs = Ins.size();
19109
19110 for (unsigned i = 0; i != NumArgs; ++i) {
19111 MVT ArgVT = Ins[i].VT;
19112 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19113
19114 Type *ArgTy = nullptr;
19115 if (IsRet)
19116 ArgTy = FType->getReturnType();
19117 else if (Ins[i].isOrigArg())
19118 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19119
19120 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19121 /*IsFixed=*/true, IsRet, ArgTy)) {
19122 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19123 << ArgVT << '\n');
19124 llvm_unreachable(nullptr);
19125 }
19126 }
19127}
19128
19129void RISCVTargetLowering::analyzeOutputArgs(
19130 MachineFunction &MF, CCState &CCInfo,
19131 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19132 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19133 unsigned NumArgs = Outs.size();
19134
19135 for (unsigned i = 0; i != NumArgs; i++) {
19136 MVT ArgVT = Outs[i].VT;
19137 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19138 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19139
19140 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19141 Outs[i].IsFixed, IsRet, OrigTy)) {
19142 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19143 << ArgVT << "\n");
19144 llvm_unreachable(nullptr);
19145 }
19146 }
19147}
19148
19149// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19150// values.
19152 const CCValAssign &VA, const SDLoc &DL,
19153 const RISCVSubtarget &Subtarget) {
19154 if (VA.needsCustom()) {
19155 if (VA.getLocVT().isInteger() &&
19156 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19157 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19158 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
19159 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19161 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19162 llvm_unreachable("Unexpected Custom handling.");
19163 }
19164
19165 switch (VA.getLocInfo()) {
19166 default:
19167 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19168 case CCValAssign::Full:
19169 break;
19170 case CCValAssign::BCvt:
19171 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19172 break;
19173 }
19174 return Val;
19175}
19176
19177// The caller is responsible for loading the full value if the argument is
19178// passed with CCValAssign::Indirect.
19180 const CCValAssign &VA, const SDLoc &DL,
19181 const ISD::InputArg &In,
19182 const RISCVTargetLowering &TLI) {
19185 EVT LocVT = VA.getLocVT();
19186 SDValue Val;
19187 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19188 Register VReg = RegInfo.createVirtualRegister(RC);
19189 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19190 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19191
19192 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19193 if (In.isOrigArg()) {
19194 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19195 if (OrigArg->getType()->isIntegerTy()) {
19196 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19197 // An input zero extended from i31 can also be considered sign extended.
19198 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19199 (BitWidth < 32 && In.Flags.isZExt())) {
19201 RVFI->addSExt32Register(VReg);
19202 }
19203 }
19204 }
19205
19207 return Val;
19208
19209 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19210}
19211
19213 const CCValAssign &VA, const SDLoc &DL,
19214 const RISCVSubtarget &Subtarget) {
19215 EVT LocVT = VA.getLocVT();
19216
19217 if (VA.needsCustom()) {
19218 if (LocVT.isInteger() &&
19219 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19220 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19221 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
19222 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19223 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19224 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
19225 llvm_unreachable("Unexpected Custom handling.");
19226 }
19227
19228 switch (VA.getLocInfo()) {
19229 default:
19230 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19231 case CCValAssign::Full:
19232 break;
19233 case CCValAssign::BCvt:
19234 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19235 break;
19236 }
19237 return Val;
19238}
19239
19240// The caller is responsible for loading the full value if the argument is
19241// passed with CCValAssign::Indirect.
19243 const CCValAssign &VA, const SDLoc &DL) {
19245 MachineFrameInfo &MFI = MF.getFrameInfo();
19246 EVT LocVT = VA.getLocVT();
19247 EVT ValVT = VA.getValVT();
19249 if (VA.getLocInfo() == CCValAssign::Indirect) {
19250 // When the value is a scalable vector, we save the pointer which points to
19251 // the scalable vector value in the stack. The ValVT will be the pointer
19252 // type, instead of the scalable vector type.
19253 ValVT = LocVT;
19254 }
19255 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19256 /*IsImmutable=*/true);
19257 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19258 SDValue Val;
19259
19261 switch (VA.getLocInfo()) {
19262 default:
19263 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19264 case CCValAssign::Full:
19266 case CCValAssign::BCvt:
19267 break;
19268 }
19269 Val = DAG.getExtLoad(
19270 ExtType, DL, LocVT, Chain, FIN,
19272 return Val;
19273}
19274
19276 const CCValAssign &VA,
19277 const CCValAssign &HiVA,
19278 const SDLoc &DL) {
19279 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19280 "Unexpected VA");
19282 MachineFrameInfo &MFI = MF.getFrameInfo();
19284
19285 assert(VA.isRegLoc() && "Expected register VA assignment");
19286
19287 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19288 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19289 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19290 SDValue Hi;
19291 if (HiVA.isMemLoc()) {
19292 // Second half of f64 is passed on the stack.
19293 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19294 /*IsImmutable=*/true);
19295 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19296 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19298 } else {
19299 // Second half of f64 is passed in another GPR.
19300 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19301 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19302 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19303 }
19304 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19305}
19306
19307// Transform physical registers into virtual registers.
19309 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19310 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19311 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19312
19314
19315 switch (CallConv) {
19316 default:
19317 report_fatal_error("Unsupported calling convention");
19318 case CallingConv::C:
19319 case CallingConv::Fast:
19321 case CallingConv::GRAAL:
19323 break;
19324 case CallingConv::GHC:
19325 if (Subtarget.hasStdExtE())
19326 report_fatal_error("GHC calling convention is not supported on RVE!");
19327 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19328 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19329 "(Zdinx/D) instruction set extensions");
19330 }
19331
19332 const Function &Func = MF.getFunction();
19333 if (Func.hasFnAttribute("interrupt")) {
19334 if (!Func.arg_empty())
19336 "Functions with the interrupt attribute cannot have arguments!");
19337
19338 StringRef Kind =
19339 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19340
19341 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19343 "Function interrupt attribute argument not supported!");
19344 }
19345
19346 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19347 MVT XLenVT = Subtarget.getXLenVT();
19348 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19349 // Used with vargs to acumulate store chains.
19350 std::vector<SDValue> OutChains;
19351
19352 // Assign locations to all of the incoming arguments.
19354 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19355
19356 if (CallConv == CallingConv::GHC)
19358 else
19359 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19361 : CC_RISCV);
19362
19363 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19364 CCValAssign &VA = ArgLocs[i];
19365 SDValue ArgValue;
19366 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19367 // case.
19368 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19369 assert(VA.needsCustom());
19370 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19371 } else if (VA.isRegLoc())
19372 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19373 else
19374 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19375
19376 if (VA.getLocInfo() == CCValAssign::Indirect) {
19377 // If the original argument was split and passed by reference (e.g. i128
19378 // on RV32), we need to load all parts of it here (using the same
19379 // address). Vectors may be partly split to registers and partly to the
19380 // stack, in which case the base address is partly offset and subsequent
19381 // stores are relative to that.
19382 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19384 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19385 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19386 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19387 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19388 CCValAssign &PartVA = ArgLocs[i + 1];
19389 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19390 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19391 if (PartVA.getValVT().isScalableVector())
19392 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19393 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19394 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19396 ++i;
19397 ++InsIdx;
19398 }
19399 continue;
19400 }
19401 InVals.push_back(ArgValue);
19402 }
19403
19404 if (any_of(ArgLocs,
19405 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19407
19408 if (IsVarArg) {
19409 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19410 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19411 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19412 MachineFrameInfo &MFI = MF.getFrameInfo();
19413 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19415
19416 // Size of the vararg save area. For now, the varargs save area is either
19417 // zero or large enough to hold a0-a7.
19418 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19419 int FI;
19420
19421 // If all registers are allocated, then all varargs must be passed on the
19422 // stack and we don't need to save any argregs.
19423 if (VarArgsSaveSize == 0) {
19424 int VaArgOffset = CCInfo.getStackSize();
19425 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19426 } else {
19427 int VaArgOffset = -VarArgsSaveSize;
19428 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19429
19430 // If saving an odd number of registers then create an extra stack slot to
19431 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19432 // offsets to even-numbered registered remain 2*XLEN-aligned.
19433 if (Idx % 2) {
19435 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19436 VarArgsSaveSize += XLenInBytes;
19437 }
19438
19439 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19440
19441 // Copy the integer registers that may have been used for passing varargs
19442 // to the vararg save area.
19443 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19444 const Register Reg = RegInfo.createVirtualRegister(RC);
19445 RegInfo.addLiveIn(ArgRegs[I], Reg);
19446 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19447 SDValue Store = DAG.getStore(
19448 Chain, DL, ArgValue, FIN,
19449 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19450 OutChains.push_back(Store);
19451 FIN =
19452 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19453 }
19454 }
19455
19456 // Record the frame index of the first variable argument
19457 // which is a value necessary to VASTART.
19458 RVFI->setVarArgsFrameIndex(FI);
19459 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19460 }
19461
19462 // All stores are grouped in one node to allow the matching between
19463 // the size of Ins and InVals. This only happens for vararg functions.
19464 if (!OutChains.empty()) {
19465 OutChains.push_back(Chain);
19466 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19467 }
19468
19469 return Chain;
19470}
19471
19472/// isEligibleForTailCallOptimization - Check whether the call is eligible
19473/// for tail call optimization.
19474/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19475bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19476 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19477 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19478
19479 auto CalleeCC = CLI.CallConv;
19480 auto &Outs = CLI.Outs;
19481 auto &Caller = MF.getFunction();
19482 auto CallerCC = Caller.getCallingConv();
19483
19484 // Exception-handling functions need a special set of instructions to
19485 // indicate a return to the hardware. Tail-calling another function would
19486 // probably break this.
19487 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19488 // should be expanded as new function attributes are introduced.
19489 if (Caller.hasFnAttribute("interrupt"))
19490 return false;
19491
19492 // Do not tail call opt if the stack is used to pass parameters.
19493 if (CCInfo.getStackSize() != 0)
19494 return false;
19495
19496 // Do not tail call opt if any parameters need to be passed indirectly.
19497 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19498 // passed indirectly. So the address of the value will be passed in a
19499 // register, or if not available, then the address is put on the stack. In
19500 // order to pass indirectly, space on the stack often needs to be allocated
19501 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19502 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19503 // are passed CCValAssign::Indirect.
19504 for (auto &VA : ArgLocs)
19505 if (VA.getLocInfo() == CCValAssign::Indirect)
19506 return false;
19507
19508 // Do not tail call opt if either caller or callee uses struct return
19509 // semantics.
19510 auto IsCallerStructRet = Caller.hasStructRetAttr();
19511 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19512 if (IsCallerStructRet || IsCalleeStructRet)
19513 return false;
19514
19515 // The callee has to preserve all registers the caller needs to preserve.
19516 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19517 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19518 if (CalleeCC != CallerCC) {
19519 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19520 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19521 return false;
19522 }
19523
19524 // Byval parameters hand the function a pointer directly into the stack area
19525 // we want to reuse during a tail call. Working around this *is* possible
19526 // but less efficient and uglier in LowerCall.
19527 for (auto &Arg : Outs)
19528 if (Arg.Flags.isByVal())
19529 return false;
19530
19531 return true;
19532}
19533
19535 return DAG.getDataLayout().getPrefTypeAlign(
19536 VT.getTypeForEVT(*DAG.getContext()));
19537}
19538
19539// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19540// and output parameter nodes.
19542 SmallVectorImpl<SDValue> &InVals) const {
19543 SelectionDAG &DAG = CLI.DAG;
19544 SDLoc &DL = CLI.DL;
19546 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19548 SDValue Chain = CLI.Chain;
19549 SDValue Callee = CLI.Callee;
19550 bool &IsTailCall = CLI.IsTailCall;
19551 CallingConv::ID CallConv = CLI.CallConv;
19552 bool IsVarArg = CLI.IsVarArg;
19553 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19554 MVT XLenVT = Subtarget.getXLenVT();
19555
19557
19558 // Analyze the operands of the call, assigning locations to each operand.
19560 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19561
19562 if (CallConv == CallingConv::GHC) {
19563 if (Subtarget.hasStdExtE())
19564 report_fatal_error("GHC calling convention is not supported on RVE!");
19565 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
19566 } else
19567 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19569 : CC_RISCV);
19570
19571 // Check if it's really possible to do a tail call.
19572 if (IsTailCall)
19573 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19574
19575 if (IsTailCall)
19576 ++NumTailCalls;
19577 else if (CLI.CB && CLI.CB->isMustTailCall())
19578 report_fatal_error("failed to perform tail call elimination on a call "
19579 "site marked musttail");
19580
19581 // Get a count of how many bytes are to be pushed on the stack.
19582 unsigned NumBytes = ArgCCInfo.getStackSize();
19583
19584 // Create local copies for byval args
19585 SmallVector<SDValue, 8> ByValArgs;
19586 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19587 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19588 if (!Flags.isByVal())
19589 continue;
19590
19591 SDValue Arg = OutVals[i];
19592 unsigned Size = Flags.getByValSize();
19593 Align Alignment = Flags.getNonZeroByValAlign();
19594
19595 int FI =
19596 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19597 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19598 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19599
19600 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19601 /*IsVolatile=*/false,
19602 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
19604 ByValArgs.push_back(FIPtr);
19605 }
19606
19607 if (!IsTailCall)
19608 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19609
19610 // Copy argument values to their designated locations.
19612 SmallVector<SDValue, 8> MemOpChains;
19613 SDValue StackPtr;
19614 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19615 ++i, ++OutIdx) {
19616 CCValAssign &VA = ArgLocs[i];
19617 SDValue ArgValue = OutVals[OutIdx];
19618 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19619
19620 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19621 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19622 assert(VA.isRegLoc() && "Expected register VA assignment");
19623 assert(VA.needsCustom());
19624 SDValue SplitF64 = DAG.getNode(
19625 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19626 SDValue Lo = SplitF64.getValue(0);
19627 SDValue Hi = SplitF64.getValue(1);
19628
19629 Register RegLo = VA.getLocReg();
19630 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19631
19632 // Get the CCValAssign for the Hi part.
19633 CCValAssign &HiVA = ArgLocs[++i];
19634
19635 if (HiVA.isMemLoc()) {
19636 // Second half of f64 is passed on the stack.
19637 if (!StackPtr.getNode())
19638 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19639 SDValue Address =
19640 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19641 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19642 // Emit the store.
19643 MemOpChains.push_back(DAG.getStore(
19644 Chain, DL, Hi, Address,
19646 } else {
19647 // Second half of f64 is passed in another GPR.
19648 Register RegHigh = HiVA.getLocReg();
19649 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19650 }
19651 continue;
19652 }
19653
19654 // Promote the value if needed.
19655 // For now, only handle fully promoted and indirect arguments.
19656 if (VA.getLocInfo() == CCValAssign::Indirect) {
19657 // Store the argument in a stack slot and pass its address.
19658 Align StackAlign =
19659 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19660 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19661 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19662 // If the original argument was split (e.g. i128), we need
19663 // to store the required parts of it here (and pass just one address).
19664 // Vectors may be partly split to registers and partly to the stack, in
19665 // which case the base address is partly offset and subsequent stores are
19666 // relative to that.
19667 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19668 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19669 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19670 // Calculate the total size to store. We don't have access to what we're
19671 // actually storing other than performing the loop and collecting the
19672 // info.
19674 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19675 SDValue PartValue = OutVals[OutIdx + 1];
19676 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19677 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19678 EVT PartVT = PartValue.getValueType();
19679 if (PartVT.isScalableVector())
19680 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19681 StoredSize += PartVT.getStoreSize();
19682 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19683 Parts.push_back(std::make_pair(PartValue, Offset));
19684 ++i;
19685 ++OutIdx;
19686 }
19687 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19688 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19689 MemOpChains.push_back(
19690 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19692 for (const auto &Part : Parts) {
19693 SDValue PartValue = Part.first;
19694 SDValue PartOffset = Part.second;
19695 SDValue Address =
19696 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19697 MemOpChains.push_back(
19698 DAG.getStore(Chain, DL, PartValue, Address,
19700 }
19701 ArgValue = SpillSlot;
19702 } else {
19703 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19704 }
19705
19706 // Use local copy if it is a byval arg.
19707 if (Flags.isByVal())
19708 ArgValue = ByValArgs[j++];
19709
19710 if (VA.isRegLoc()) {
19711 // Queue up the argument copies and emit them at the end.
19712 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19713 } else {
19714 assert(VA.isMemLoc() && "Argument not register or memory");
19715 assert(!IsTailCall && "Tail call not allowed if stack is used "
19716 "for passing parameters");
19717
19718 // Work out the address of the stack slot.
19719 if (!StackPtr.getNode())
19720 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19721 SDValue Address =
19722 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19724
19725 // Emit the store.
19726 MemOpChains.push_back(
19727 DAG.getStore(Chain, DL, ArgValue, Address,
19729 }
19730 }
19731
19732 // Join the stores, which are independent of one another.
19733 if (!MemOpChains.empty())
19734 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19735
19736 SDValue Glue;
19737
19738 // Build a sequence of copy-to-reg nodes, chained and glued together.
19739 for (auto &Reg : RegsToPass) {
19740 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19741 Glue = Chain.getValue(1);
19742 }
19743
19744 // Validate that none of the argument registers have been marked as
19745 // reserved, if so report an error. Do the same for the return address if this
19746 // is not a tailcall.
19747 validateCCReservedRegs(RegsToPass, MF);
19748 if (!IsTailCall &&
19751 MF.getFunction(),
19752 "Return address register required, but has been reserved."});
19753
19754 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19755 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19756 // split it and then direct call can be matched by PseudoCALL.
19757 bool CalleeIsLargeExternalSymbol = false;
19759 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
19760 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
19761 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19762 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
19763 CalleeIsLargeExternalSymbol = true;
19764 }
19765 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19766 const GlobalValue *GV = S->getGlobal();
19767 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19768 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19769 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19770 }
19771
19772 // The first call operand is the chain and the second is the target address.
19774 Ops.push_back(Chain);
19775 Ops.push_back(Callee);
19776
19777 // Add argument registers to the end of the list so that they are
19778 // known live into the call.
19779 for (auto &Reg : RegsToPass)
19780 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19781
19782 if (!IsTailCall) {
19783 // Add a register mask operand representing the call-preserved registers.
19784 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19785 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19786 assert(Mask && "Missing call preserved mask for calling convention");
19787 Ops.push_back(DAG.getRegisterMask(Mask));
19788 }
19789
19790 // Glue the call to the argument copies, if any.
19791 if (Glue.getNode())
19792 Ops.push_back(Glue);
19793
19794 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19795 "Unexpected CFI type for a direct call");
19796
19797 // Emit the call.
19798 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19799
19800 // Use software guarded branch for large code model non-indirect calls
19801 // Tail call to external symbol will have a null CLI.CB and we need another
19802 // way to determine the callsite type
19803 bool NeedSWGuarded = false;
19805 Subtarget.hasStdExtZicfilp() &&
19806 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
19807 NeedSWGuarded = true;
19808
19809 if (IsTailCall) {
19811 unsigned CallOpc =
19812 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
19813 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
19814 if (CLI.CFIType)
19815 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19816 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19817 return Ret;
19818 }
19819
19820 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
19821 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
19822 if (CLI.CFIType)
19823 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19824 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19825 Glue = Chain.getValue(1);
19826
19827 // Mark the end of the call, which is glued to the call itself.
19828 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19829 Glue = Chain.getValue(1);
19830
19831 // Assign locations to each value returned by this call.
19833 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19834 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
19835
19836 // Copy all of the result registers out of their specified physreg.
19837 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19838 auto &VA = RVLocs[i];
19839 // Copy the value out
19840 SDValue RetValue =
19841 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19842 // Glue the RetValue to the end of the call sequence
19843 Chain = RetValue.getValue(1);
19844 Glue = RetValue.getValue(2);
19845
19846 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19847 assert(VA.needsCustom());
19848 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19849 MVT::i32, Glue);
19850 Chain = RetValue2.getValue(1);
19851 Glue = RetValue2.getValue(2);
19852 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19853 RetValue2);
19854 } else
19855 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19856
19857 InVals.push_back(RetValue);
19858 }
19859
19860 return Chain;
19861}
19862
19864 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19865 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19867 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19868
19869 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19870 MVT VT = Outs[i].VT;
19871 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19872 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
19873 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
19874 return false;
19875 }
19876 return true;
19877}
19878
19879SDValue
19880RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
19881 bool IsVarArg,
19883 const SmallVectorImpl<SDValue> &OutVals,
19884 const SDLoc &DL, SelectionDAG &DAG) const {
19886 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19887
19888 // Stores the assignment of the return value to a location.
19890
19891 // Info about the registers and stack slot.
19892 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19893 *DAG.getContext());
19894
19895 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19896 nullptr, CC_RISCV);
19897
19898 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19899 report_fatal_error("GHC functions return void only");
19900
19901 SDValue Glue;
19902 SmallVector<SDValue, 4> RetOps(1, Chain);
19903
19904 // Copy the result values into the output registers.
19905 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19906 SDValue Val = OutVals[OutIdx];
19907 CCValAssign &VA = RVLocs[i];
19908 assert(VA.isRegLoc() && "Can only return in registers!");
19909
19910 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19911 // Handle returning f64 on RV32D with a soft float ABI.
19912 assert(VA.isRegLoc() && "Expected return via registers");
19913 assert(VA.needsCustom());
19914 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19915 DAG.getVTList(MVT::i32, MVT::i32), Val);
19916 SDValue Lo = SplitF64.getValue(0);
19917 SDValue Hi = SplitF64.getValue(1);
19918 Register RegLo = VA.getLocReg();
19919 Register RegHi = RVLocs[++i].getLocReg();
19920
19921 if (STI.isRegisterReservedByUser(RegLo) ||
19922 STI.isRegisterReservedByUser(RegHi))
19924 MF.getFunction(),
19925 "Return value register required, but has been reserved."});
19926
19927 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19928 Glue = Chain.getValue(1);
19929 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19930 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19931 Glue = Chain.getValue(1);
19932 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19933 } else {
19934 // Handle a 'normal' return.
19935 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19936 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19937
19938 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19940 MF.getFunction(),
19941 "Return value register required, but has been reserved."});
19942
19943 // Guarantee that all emitted copies are stuck together.
19944 Glue = Chain.getValue(1);
19945 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19946 }
19947 }
19948
19949 RetOps[0] = Chain; // Update chain.
19950
19951 // Add the glue node if we have it.
19952 if (Glue.getNode()) {
19953 RetOps.push_back(Glue);
19954 }
19955
19956 if (any_of(RVLocs,
19957 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19959
19960 unsigned RetOpc = RISCVISD::RET_GLUE;
19961 // Interrupt service routines use different return instructions.
19962 const Function &Func = DAG.getMachineFunction().getFunction();
19963 if (Func.hasFnAttribute("interrupt")) {
19964 if (!Func.getReturnType()->isVoidTy())
19966 "Functions with the interrupt attribute must have void return type!");
19967
19969 StringRef Kind =
19970 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19971
19972 if (Kind == "supervisor")
19973 RetOpc = RISCVISD::SRET_GLUE;
19974 else
19975 RetOpc = RISCVISD::MRET_GLUE;
19976 }
19977
19978 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19979}
19980
19981void RISCVTargetLowering::validateCCReservedRegs(
19982 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19983 MachineFunction &MF) const {
19984 const Function &F = MF.getFunction();
19985 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19986
19987 if (llvm::any_of(Regs, [&STI](auto Reg) {
19988 return STI.isRegisterReservedByUser(Reg.first);
19989 }))
19990 F.getContext().diagnose(DiagnosticInfoUnsupported{
19991 F, "Argument register required, but has been reserved."});
19992}
19993
19994// Check if the result of the node is only used as a return value, as
19995// otherwise we can't perform a tail-call.
19997 if (N->getNumValues() != 1)
19998 return false;
19999 if (!N->hasNUsesOfValue(1, 0))
20000 return false;
20001
20002 SDNode *Copy = *N->use_begin();
20003
20004 if (Copy->getOpcode() == ISD::BITCAST) {
20005 return isUsedByReturnOnly(Copy, Chain);
20006 }
20007
20008 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20009 // with soft float ABIs.
20010 if (Copy->getOpcode() != ISD::CopyToReg) {
20011 return false;
20012 }
20013
20014 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20015 // isn't safe to perform a tail call.
20016 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20017 return false;
20018
20019 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20020 bool HasRet = false;
20021 for (SDNode *Node : Copy->uses()) {
20022 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20023 return false;
20024 HasRet = true;
20025 }
20026 if (!HasRet)
20027 return false;
20028
20029 Chain = Copy->getOperand(0);
20030 return true;
20031}
20032
20034 return CI->isTailCall();
20035}
20036
20037const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20038#define NODE_NAME_CASE(NODE) \
20039 case RISCVISD::NODE: \
20040 return "RISCVISD::" #NODE;
20041 // clang-format off
20042 switch ((RISCVISD::NodeType)Opcode) {
20044 break;
20045 NODE_NAME_CASE(RET_GLUE)
20046 NODE_NAME_CASE(SRET_GLUE)
20047 NODE_NAME_CASE(MRET_GLUE)
20048 NODE_NAME_CASE(CALL)
20049 NODE_NAME_CASE(SELECT_CC)
20050 NODE_NAME_CASE(BR_CC)
20051 NODE_NAME_CASE(BuildPairF64)
20052 NODE_NAME_CASE(SplitF64)
20053 NODE_NAME_CASE(TAIL)
20054 NODE_NAME_CASE(ADD_LO)
20055 NODE_NAME_CASE(HI)
20056 NODE_NAME_CASE(LLA)
20057 NODE_NAME_CASE(ADD_TPREL)
20058 NODE_NAME_CASE(MULHSU)
20059 NODE_NAME_CASE(SHL_ADD)
20060 NODE_NAME_CASE(SLLW)
20061 NODE_NAME_CASE(SRAW)
20062 NODE_NAME_CASE(SRLW)
20063 NODE_NAME_CASE(DIVW)
20064 NODE_NAME_CASE(DIVUW)
20065 NODE_NAME_CASE(REMUW)
20066 NODE_NAME_CASE(ROLW)
20067 NODE_NAME_CASE(RORW)
20068 NODE_NAME_CASE(CLZW)
20069 NODE_NAME_CASE(CTZW)
20070 NODE_NAME_CASE(ABSW)
20071 NODE_NAME_CASE(FMV_H_X)
20072 NODE_NAME_CASE(FMV_X_ANYEXTH)
20073 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20074 NODE_NAME_CASE(FMV_W_X_RV64)
20075 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20076 NODE_NAME_CASE(FCVT_X)
20077 NODE_NAME_CASE(FCVT_XU)
20078 NODE_NAME_CASE(FCVT_W_RV64)
20079 NODE_NAME_CASE(FCVT_WU_RV64)
20080 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20081 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20082 NODE_NAME_CASE(FROUND)
20083 NODE_NAME_CASE(FCLASS)
20084 NODE_NAME_CASE(FSGNJX)
20085 NODE_NAME_CASE(FMAX)
20086 NODE_NAME_CASE(FMIN)
20087 NODE_NAME_CASE(FLI)
20088 NODE_NAME_CASE(READ_COUNTER_WIDE)
20089 NODE_NAME_CASE(BREV8)
20090 NODE_NAME_CASE(ORC_B)
20091 NODE_NAME_CASE(ZIP)
20092 NODE_NAME_CASE(UNZIP)
20093 NODE_NAME_CASE(CLMUL)
20094 NODE_NAME_CASE(CLMULH)
20095 NODE_NAME_CASE(CLMULR)
20096 NODE_NAME_CASE(MOPR)
20097 NODE_NAME_CASE(MOPRR)
20098 NODE_NAME_CASE(SHA256SIG0)
20099 NODE_NAME_CASE(SHA256SIG1)
20100 NODE_NAME_CASE(SHA256SUM0)
20101 NODE_NAME_CASE(SHA256SUM1)
20102 NODE_NAME_CASE(SM4KS)
20103 NODE_NAME_CASE(SM4ED)
20104 NODE_NAME_CASE(SM3P0)
20105 NODE_NAME_CASE(SM3P1)
20106 NODE_NAME_CASE(TH_LWD)
20107 NODE_NAME_CASE(TH_LWUD)
20108 NODE_NAME_CASE(TH_LDD)
20109 NODE_NAME_CASE(TH_SWD)
20110 NODE_NAME_CASE(TH_SDD)
20111 NODE_NAME_CASE(VMV_V_V_VL)
20112 NODE_NAME_CASE(VMV_V_X_VL)
20113 NODE_NAME_CASE(VFMV_V_F_VL)
20114 NODE_NAME_CASE(VMV_X_S)
20115 NODE_NAME_CASE(VMV_S_X_VL)
20116 NODE_NAME_CASE(VFMV_S_F_VL)
20117 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20118 NODE_NAME_CASE(READ_VLENB)
20119 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20120 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20121 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20122 NODE_NAME_CASE(VSLIDEUP_VL)
20123 NODE_NAME_CASE(VSLIDE1UP_VL)
20124 NODE_NAME_CASE(VSLIDEDOWN_VL)
20125 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20126 NODE_NAME_CASE(VFSLIDE1UP_VL)
20127 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20128 NODE_NAME_CASE(VID_VL)
20129 NODE_NAME_CASE(VFNCVT_ROD_VL)
20130 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20131 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20132 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20133 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20134 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20135 NODE_NAME_CASE(VECREDUCE_AND_VL)
20136 NODE_NAME_CASE(VECREDUCE_OR_VL)
20137 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20138 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20139 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20140 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20141 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20142 NODE_NAME_CASE(ADD_VL)
20143 NODE_NAME_CASE(AND_VL)
20144 NODE_NAME_CASE(MUL_VL)
20145 NODE_NAME_CASE(OR_VL)
20146 NODE_NAME_CASE(SDIV_VL)
20147 NODE_NAME_CASE(SHL_VL)
20148 NODE_NAME_CASE(SREM_VL)
20149 NODE_NAME_CASE(SRA_VL)
20150 NODE_NAME_CASE(SRL_VL)
20151 NODE_NAME_CASE(ROTL_VL)
20152 NODE_NAME_CASE(ROTR_VL)
20153 NODE_NAME_CASE(SUB_VL)
20154 NODE_NAME_CASE(UDIV_VL)
20155 NODE_NAME_CASE(UREM_VL)
20156 NODE_NAME_CASE(XOR_VL)
20157 NODE_NAME_CASE(AVGFLOORS_VL)
20158 NODE_NAME_CASE(AVGFLOORU_VL)
20159 NODE_NAME_CASE(AVGCEILS_VL)
20160 NODE_NAME_CASE(AVGCEILU_VL)
20161 NODE_NAME_CASE(SADDSAT_VL)
20162 NODE_NAME_CASE(UADDSAT_VL)
20163 NODE_NAME_CASE(SSUBSAT_VL)
20164 NODE_NAME_CASE(USUBSAT_VL)
20165 NODE_NAME_CASE(FADD_VL)
20166 NODE_NAME_CASE(FSUB_VL)
20167 NODE_NAME_CASE(FMUL_VL)
20168 NODE_NAME_CASE(FDIV_VL)
20169 NODE_NAME_CASE(FNEG_VL)
20170 NODE_NAME_CASE(FABS_VL)
20171 NODE_NAME_CASE(FSQRT_VL)
20172 NODE_NAME_CASE(FCLASS_VL)
20173 NODE_NAME_CASE(VFMADD_VL)
20174 NODE_NAME_CASE(VFNMADD_VL)
20175 NODE_NAME_CASE(VFMSUB_VL)
20176 NODE_NAME_CASE(VFNMSUB_VL)
20177 NODE_NAME_CASE(VFWMADD_VL)
20178 NODE_NAME_CASE(VFWNMADD_VL)
20179 NODE_NAME_CASE(VFWMSUB_VL)
20180 NODE_NAME_CASE(VFWNMSUB_VL)
20181 NODE_NAME_CASE(FCOPYSIGN_VL)
20182 NODE_NAME_CASE(SMIN_VL)
20183 NODE_NAME_CASE(SMAX_VL)
20184 NODE_NAME_CASE(UMIN_VL)
20185 NODE_NAME_CASE(UMAX_VL)
20186 NODE_NAME_CASE(BITREVERSE_VL)
20187 NODE_NAME_CASE(BSWAP_VL)
20188 NODE_NAME_CASE(CTLZ_VL)
20189 NODE_NAME_CASE(CTTZ_VL)
20190 NODE_NAME_CASE(CTPOP_VL)
20191 NODE_NAME_CASE(VFMIN_VL)
20192 NODE_NAME_CASE(VFMAX_VL)
20193 NODE_NAME_CASE(MULHS_VL)
20194 NODE_NAME_CASE(MULHU_VL)
20195 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20196 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20197 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20198 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20199 NODE_NAME_CASE(VFCVT_X_F_VL)
20200 NODE_NAME_CASE(VFCVT_XU_F_VL)
20201 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20202 NODE_NAME_CASE(SINT_TO_FP_VL)
20203 NODE_NAME_CASE(UINT_TO_FP_VL)
20204 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20205 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20206 NODE_NAME_CASE(FP_EXTEND_VL)
20207 NODE_NAME_CASE(FP_ROUND_VL)
20208 NODE_NAME_CASE(STRICT_FADD_VL)
20209 NODE_NAME_CASE(STRICT_FSUB_VL)
20210 NODE_NAME_CASE(STRICT_FMUL_VL)
20211 NODE_NAME_CASE(STRICT_FDIV_VL)
20212 NODE_NAME_CASE(STRICT_FSQRT_VL)
20213 NODE_NAME_CASE(STRICT_VFMADD_VL)
20214 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20215 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20216 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20217 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20218 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20219 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20220 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20221 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20222 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20223 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20224 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20225 NODE_NAME_CASE(STRICT_FSETCC_VL)
20226 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20227 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20228 NODE_NAME_CASE(VWMUL_VL)
20229 NODE_NAME_CASE(VWMULU_VL)
20230 NODE_NAME_CASE(VWMULSU_VL)
20231 NODE_NAME_CASE(VWADD_VL)
20232 NODE_NAME_CASE(VWADDU_VL)
20233 NODE_NAME_CASE(VWSUB_VL)
20234 NODE_NAME_CASE(VWSUBU_VL)
20235 NODE_NAME_CASE(VWADD_W_VL)
20236 NODE_NAME_CASE(VWADDU_W_VL)
20237 NODE_NAME_CASE(VWSUB_W_VL)
20238 NODE_NAME_CASE(VWSUBU_W_VL)
20239 NODE_NAME_CASE(VWSLL_VL)
20240 NODE_NAME_CASE(VFWMUL_VL)
20241 NODE_NAME_CASE(VFWADD_VL)
20242 NODE_NAME_CASE(VFWSUB_VL)
20243 NODE_NAME_CASE(VFWADD_W_VL)
20244 NODE_NAME_CASE(VFWSUB_W_VL)
20245 NODE_NAME_CASE(VWMACC_VL)
20246 NODE_NAME_CASE(VWMACCU_VL)
20247 NODE_NAME_CASE(VWMACCSU_VL)
20248 NODE_NAME_CASE(VNSRL_VL)
20249 NODE_NAME_CASE(SETCC_VL)
20250 NODE_NAME_CASE(VMERGE_VL)
20251 NODE_NAME_CASE(VMAND_VL)
20252 NODE_NAME_CASE(VMOR_VL)
20253 NODE_NAME_CASE(VMXOR_VL)
20254 NODE_NAME_CASE(VMCLR_VL)
20255 NODE_NAME_CASE(VMSET_VL)
20256 NODE_NAME_CASE(VRGATHER_VX_VL)
20257 NODE_NAME_CASE(VRGATHER_VV_VL)
20258 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20259 NODE_NAME_CASE(VSEXT_VL)
20260 NODE_NAME_CASE(VZEXT_VL)
20261 NODE_NAME_CASE(VCPOP_VL)
20262 NODE_NAME_CASE(VFIRST_VL)
20263 NODE_NAME_CASE(READ_CSR)
20264 NODE_NAME_CASE(WRITE_CSR)
20265 NODE_NAME_CASE(SWAP_CSR)
20266 NODE_NAME_CASE(CZERO_EQZ)
20267 NODE_NAME_CASE(CZERO_NEZ)
20268 NODE_NAME_CASE(SW_GUARDED_BRIND)
20269 NODE_NAME_CASE(SW_GUARDED_CALL)
20270 NODE_NAME_CASE(SW_GUARDED_TAIL)
20271 NODE_NAME_CASE(TUPLE_INSERT)
20272 NODE_NAME_CASE(TUPLE_EXTRACT)
20273 NODE_NAME_CASE(SF_VC_XV_SE)
20274 NODE_NAME_CASE(SF_VC_IV_SE)
20275 NODE_NAME_CASE(SF_VC_VV_SE)
20276 NODE_NAME_CASE(SF_VC_FV_SE)
20277 NODE_NAME_CASE(SF_VC_XVV_SE)
20278 NODE_NAME_CASE(SF_VC_IVV_SE)
20279 NODE_NAME_CASE(SF_VC_VVV_SE)
20280 NODE_NAME_CASE(SF_VC_FVV_SE)
20281 NODE_NAME_CASE(SF_VC_XVW_SE)
20282 NODE_NAME_CASE(SF_VC_IVW_SE)
20283 NODE_NAME_CASE(SF_VC_VVW_SE)
20284 NODE_NAME_CASE(SF_VC_FVW_SE)
20285 NODE_NAME_CASE(SF_VC_V_X_SE)
20286 NODE_NAME_CASE(SF_VC_V_I_SE)
20287 NODE_NAME_CASE(SF_VC_V_XV_SE)
20288 NODE_NAME_CASE(SF_VC_V_IV_SE)
20289 NODE_NAME_CASE(SF_VC_V_VV_SE)
20290 NODE_NAME_CASE(SF_VC_V_FV_SE)
20291 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20292 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20293 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20294 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20295 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20296 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20297 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20298 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20299 }
20300 // clang-format on
20301 return nullptr;
20302#undef NODE_NAME_CASE
20303}
20304
20305/// getConstraintType - Given a constraint letter, return the type of
20306/// constraint it is for this target.
20309 if (Constraint.size() == 1) {
20310 switch (Constraint[0]) {
20311 default:
20312 break;
20313 case 'f':
20314 return C_RegisterClass;
20315 case 'I':
20316 case 'J':
20317 case 'K':
20318 return C_Immediate;
20319 case 'A':
20320 return C_Memory;
20321 case 's':
20322 case 'S': // A symbolic address
20323 return C_Other;
20324 }
20325 } else {
20326 if (Constraint == "vr" || Constraint == "vm")
20327 return C_RegisterClass;
20328 }
20329 return TargetLowering::getConstraintType(Constraint);
20330}
20331
20332std::pair<unsigned, const TargetRegisterClass *>
20334 StringRef Constraint,
20335 MVT VT) const {
20336 // First, see if this is a constraint that directly corresponds to a RISC-V
20337 // register class.
20338 if (Constraint.size() == 1) {
20339 switch (Constraint[0]) {
20340 case 'r':
20341 // TODO: Support fixed vectors up to XLen for P extension?
20342 if (VT.isVector())
20343 break;
20344 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20345 return std::make_pair(0U, &RISCV::GPRF16RegClass);
20346 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20347 return std::make_pair(0U, &RISCV::GPRF32RegClass);
20348 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20349 return std::make_pair(0U, &RISCV::GPRPairRegClass);
20350 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20351 case 'f':
20352 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
20353 return std::make_pair(0U, &RISCV::FPR16RegClass);
20354 if (Subtarget.hasStdExtF() && VT == MVT::f32)
20355 return std::make_pair(0U, &RISCV::FPR32RegClass);
20356 if (Subtarget.hasStdExtD() && VT == MVT::f64)
20357 return std::make_pair(0U, &RISCV::FPR64RegClass);
20358 break;
20359 default:
20360 break;
20361 }
20362 } else if (Constraint == "vr") {
20363 for (const auto *RC :
20364 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
20365 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
20366 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
20367 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
20368 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
20369 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
20370 &RISCV::VRN2M4RegClass}) {
20371 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20372 return std::make_pair(0U, RC);
20373 }
20374 } else if (Constraint == "vm") {
20375 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20376 return std::make_pair(0U, &RISCV::VMV0RegClass);
20377 }
20378
20379 // Clang will correctly decode the usage of register name aliases into their
20380 // official names. However, other frontends like `rustc` do not. This allows
20381 // users of these frontends to use the ABI names for registers in LLVM-style
20382 // register constraints.
20383 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20384 .Case("{zero}", RISCV::X0)
20385 .Case("{ra}", RISCV::X1)
20386 .Case("{sp}", RISCV::X2)
20387 .Case("{gp}", RISCV::X3)
20388 .Case("{tp}", RISCV::X4)
20389 .Case("{t0}", RISCV::X5)
20390 .Case("{t1}", RISCV::X6)
20391 .Case("{t2}", RISCV::X7)
20392 .Cases("{s0}", "{fp}", RISCV::X8)
20393 .Case("{s1}", RISCV::X9)
20394 .Case("{a0}", RISCV::X10)
20395 .Case("{a1}", RISCV::X11)
20396 .Case("{a2}", RISCV::X12)
20397 .Case("{a3}", RISCV::X13)
20398 .Case("{a4}", RISCV::X14)
20399 .Case("{a5}", RISCV::X15)
20400 .Case("{a6}", RISCV::X16)
20401 .Case("{a7}", RISCV::X17)
20402 .Case("{s2}", RISCV::X18)
20403 .Case("{s3}", RISCV::X19)
20404 .Case("{s4}", RISCV::X20)
20405 .Case("{s5}", RISCV::X21)
20406 .Case("{s6}", RISCV::X22)
20407 .Case("{s7}", RISCV::X23)
20408 .Case("{s8}", RISCV::X24)
20409 .Case("{s9}", RISCV::X25)
20410 .Case("{s10}", RISCV::X26)
20411 .Case("{s11}", RISCV::X27)
20412 .Case("{t3}", RISCV::X28)
20413 .Case("{t4}", RISCV::X29)
20414 .Case("{t5}", RISCV::X30)
20415 .Case("{t6}", RISCV::X31)
20416 .Default(RISCV::NoRegister);
20417 if (XRegFromAlias != RISCV::NoRegister)
20418 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20419
20420 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20421 // TableGen record rather than the AsmName to choose registers for InlineAsm
20422 // constraints, plus we want to match those names to the widest floating point
20423 // register type available, manually select floating point registers here.
20424 //
20425 // The second case is the ABI name of the register, so that frontends can also
20426 // use the ABI names in register constraint lists.
20427 if (Subtarget.hasStdExtF()) {
20428 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20429 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20430 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20431 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20432 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20433 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20434 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20435 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20436 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20437 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20438 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20439 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20440 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20441 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20442 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20443 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20444 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20445 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20446 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20447 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20448 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20449 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20450 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20451 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20452 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20453 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20454 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20455 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20456 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20457 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20458 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20459 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20460 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20461 .Default(RISCV::NoRegister);
20462 if (FReg != RISCV::NoRegister) {
20463 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20464 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20465 unsigned RegNo = FReg - RISCV::F0_F;
20466 unsigned DReg = RISCV::F0_D + RegNo;
20467 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20468 }
20469 if (VT == MVT::f32 || VT == MVT::Other)
20470 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20471 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20472 unsigned RegNo = FReg - RISCV::F0_F;
20473 unsigned HReg = RISCV::F0_H + RegNo;
20474 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20475 }
20476 }
20477 }
20478
20479 if (Subtarget.hasVInstructions()) {
20480 Register VReg = StringSwitch<Register>(Constraint.lower())
20481 .Case("{v0}", RISCV::V0)
20482 .Case("{v1}", RISCV::V1)
20483 .Case("{v2}", RISCV::V2)
20484 .Case("{v3}", RISCV::V3)
20485 .Case("{v4}", RISCV::V4)
20486 .Case("{v5}", RISCV::V5)
20487 .Case("{v6}", RISCV::V6)
20488 .Case("{v7}", RISCV::V7)
20489 .Case("{v8}", RISCV::V8)
20490 .Case("{v9}", RISCV::V9)
20491 .Case("{v10}", RISCV::V10)
20492 .Case("{v11}", RISCV::V11)
20493 .Case("{v12}", RISCV::V12)
20494 .Case("{v13}", RISCV::V13)
20495 .Case("{v14}", RISCV::V14)
20496 .Case("{v15}", RISCV::V15)
20497 .Case("{v16}", RISCV::V16)
20498 .Case("{v17}", RISCV::V17)
20499 .Case("{v18}", RISCV::V18)
20500 .Case("{v19}", RISCV::V19)
20501 .Case("{v20}", RISCV::V20)
20502 .Case("{v21}", RISCV::V21)
20503 .Case("{v22}", RISCV::V22)
20504 .Case("{v23}", RISCV::V23)
20505 .Case("{v24}", RISCV::V24)
20506 .Case("{v25}", RISCV::V25)
20507 .Case("{v26}", RISCV::V26)
20508 .Case("{v27}", RISCV::V27)
20509 .Case("{v28}", RISCV::V28)
20510 .Case("{v29}", RISCV::V29)
20511 .Case("{v30}", RISCV::V30)
20512 .Case("{v31}", RISCV::V31)
20513 .Default(RISCV::NoRegister);
20514 if (VReg != RISCV::NoRegister) {
20515 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20516 return std::make_pair(VReg, &RISCV::VMRegClass);
20517 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20518 return std::make_pair(VReg, &RISCV::VRRegClass);
20519 for (const auto *RC :
20520 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20521 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20522 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20523 return std::make_pair(VReg, RC);
20524 }
20525 }
20526 }
20527 }
20528
20529 std::pair<Register, const TargetRegisterClass *> Res =
20531
20532 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20533 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20534 // Subtarget into account.
20535 if (Res.second == &RISCV::GPRF16RegClass ||
20536 Res.second == &RISCV::GPRF32RegClass ||
20537 Res.second == &RISCV::GPRPairRegClass)
20538 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20539
20540 return Res;
20541}
20542
20545 // Currently only support length 1 constraints.
20546 if (ConstraintCode.size() == 1) {
20547 switch (ConstraintCode[0]) {
20548 case 'A':
20550 default:
20551 break;
20552 }
20553 }
20554
20555 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20556}
20557
20559 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20560 SelectionDAG &DAG) const {
20561 // Currently only support length 1 constraints.
20562 if (Constraint.size() == 1) {
20563 switch (Constraint[0]) {
20564 case 'I':
20565 // Validate & create a 12-bit signed immediate operand.
20566 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20567 uint64_t CVal = C->getSExtValue();
20568 if (isInt<12>(CVal))
20569 Ops.push_back(DAG.getSignedConstant(
20570 CVal, SDLoc(Op), Subtarget.getXLenVT(), /*isTarget=*/true));
20571 }
20572 return;
20573 case 'J':
20574 // Validate & create an integer zero operand.
20575 if (isNullConstant(Op))
20576 Ops.push_back(
20577 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20578 return;
20579 case 'K':
20580 // Validate & create a 5-bit unsigned immediate operand.
20581 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20582 uint64_t CVal = C->getZExtValue();
20583 if (isUInt<5>(CVal))
20584 Ops.push_back(
20585 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20586 }
20587 return;
20588 case 'S':
20590 return;
20591 default:
20592 break;
20593 }
20594 }
20595 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20596}
20597
20599 Instruction *Inst,
20600 AtomicOrdering Ord) const {
20601 if (Subtarget.hasStdExtZtso()) {
20603 return Builder.CreateFence(Ord);
20604 return nullptr;
20605 }
20606
20608 return Builder.CreateFence(Ord);
20609 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20610 return Builder.CreateFence(AtomicOrdering::Release);
20611 return nullptr;
20612}
20613
20615 Instruction *Inst,
20616 AtomicOrdering Ord) const {
20617 if (Subtarget.hasStdExtZtso()) {
20619 return Builder.CreateFence(Ord);
20620 return nullptr;
20621 }
20622
20623 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20624 return Builder.CreateFence(AtomicOrdering::Acquire);
20625 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
20627 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
20628 return nullptr;
20629}
20630
20633 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20634 // point operations can't be used in an lr/sc sequence without breaking the
20635 // forward-progress guarantee.
20636 if (AI->isFloatingPointOperation() ||
20642
20643 // Don't expand forced atomics, we want to have __sync libcalls instead.
20644 if (Subtarget.hasForcedAtomics())
20646
20647 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20648 if (AI->getOperation() == AtomicRMWInst::Nand) {
20649 if (Subtarget.hasStdExtZacas() &&
20650 (Size >= 32 || Subtarget.hasStdExtZabha()))
20652 if (Size < 32)
20654 }
20655
20656 if (Size < 32 && !Subtarget.hasStdExtZabha())
20658
20660}
20661
20662static Intrinsic::ID
20664 if (XLen == 32) {
20665 switch (BinOp) {
20666 default:
20667 llvm_unreachable("Unexpected AtomicRMW BinOp");
20669 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20670 case AtomicRMWInst::Add:
20671 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20672 case AtomicRMWInst::Sub:
20673 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20675 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20676 case AtomicRMWInst::Max:
20677 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20678 case AtomicRMWInst::Min:
20679 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20681 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20683 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20684 }
20685 }
20686
20687 if (XLen == 64) {
20688 switch (BinOp) {
20689 default:
20690 llvm_unreachable("Unexpected AtomicRMW BinOp");
20692 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20693 case AtomicRMWInst::Add:
20694 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20695 case AtomicRMWInst::Sub:
20696 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20698 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20699 case AtomicRMWInst::Max:
20700 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20701 case AtomicRMWInst::Min:
20702 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20704 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20706 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20707 }
20708 }
20709
20710 llvm_unreachable("Unexpected XLen\n");
20711}
20712
20714 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20715 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20716 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20717 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20718 // mask, as this produces better code than the LR/SC loop emitted by
20719 // int_riscv_masked_atomicrmw_xchg.
20720 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20723 if (CVal->isZero())
20724 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20725 Builder.CreateNot(Mask, "Inv_Mask"),
20726 AI->getAlign(), Ord);
20727 if (CVal->isMinusOne())
20728 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20729 AI->getAlign(), Ord);
20730 }
20731
20732 unsigned XLen = Subtarget.getXLen();
20733 Value *Ordering =
20734 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20735 Type *Tys[] = {AlignedAddr->getType()};
20736 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20737 AI->getModule(),
20739
20740 if (XLen == 64) {
20741 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20742 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20743 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20744 }
20745
20746 Value *Result;
20747
20748 // Must pass the shift amount needed to sign extend the loaded value prior
20749 // to performing a signed comparison for min/max. ShiftAmt is the number of
20750 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20751 // is the number of bits to left+right shift the value in order to
20752 // sign-extend.
20753 if (AI->getOperation() == AtomicRMWInst::Min ||
20755 const DataLayout &DL = AI->getDataLayout();
20756 unsigned ValWidth =
20757 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20758 Value *SextShamt =
20759 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20760 Result = Builder.CreateCall(LrwOpScwLoop,
20761 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20762 } else {
20763 Result =
20764 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20765 }
20766
20767 if (XLen == 64)
20768 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20769 return Result;
20770}
20771
20774 AtomicCmpXchgInst *CI) const {
20775 // Don't expand forced atomics, we want to have __sync libcalls instead.
20776 if (Subtarget.hasForcedAtomics())
20778
20780 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20781 (Size == 8 || Size == 16))
20784}
20785
20787 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20788 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20789 unsigned XLen = Subtarget.getXLen();
20790 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20791 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20792 if (XLen == 64) {
20793 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20794 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20795 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20796 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20797 }
20798 Type *Tys[] = {AlignedAddr->getType()};
20799 Function *MaskedCmpXchg =
20800 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20801 Value *Result = Builder.CreateCall(
20802 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20803 if (XLen == 64)
20804 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20805 return Result;
20806}
20807
20809 EVT DataVT) const {
20810 // We have indexed loads for all supported EEW types. Indices are always
20811 // zero extended.
20812 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20813 isTypeLegal(Extend.getValueType()) &&
20814 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20815 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20816}
20817
20819 EVT VT) const {
20820 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20821 return false;
20822
20823 switch (FPVT.getSimpleVT().SimpleTy) {
20824 case MVT::f16:
20825 return Subtarget.hasStdExtZfhmin();
20826 case MVT::f32:
20827 return Subtarget.hasStdExtF();
20828 case MVT::f64:
20829 return Subtarget.hasStdExtD();
20830 default:
20831 return false;
20832 }
20833}
20834
20836 // If we are using the small code model, we can reduce size of jump table
20837 // entry to 4 bytes.
20838 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20841 }
20843}
20844
20846 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20847 unsigned uid, MCContext &Ctx) const {
20848 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20850 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20851}
20852
20854 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20855 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20856 // a power of two as well.
20857 // FIXME: This doesn't work for zve32, but that's already broken
20858 // elsewhere for the same reason.
20859 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20860 static_assert(RISCV::RVVBitsPerBlock == 64,
20861 "RVVBitsPerBlock changed, audit needed");
20862 return true;
20863}
20864
20866 SDValue &Offset,
20868 SelectionDAG &DAG) const {
20869 // Target does not support indexed loads.
20870 if (!Subtarget.hasVendorXTHeadMemIdx())
20871 return false;
20872
20873 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20874 return false;
20875
20876 Base = Op->getOperand(0);
20877 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20878 int64_t RHSC = RHS->getSExtValue();
20879 if (Op->getOpcode() == ISD::SUB)
20880 RHSC = -(uint64_t)RHSC;
20881
20882 // The constants that can be encoded in the THeadMemIdx instructions
20883 // are of the form (sign_extend(imm5) << imm2).
20884 bool isLegalIndexedOffset = false;
20885 for (unsigned i = 0; i < 4; i++)
20886 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20887 isLegalIndexedOffset = true;
20888 break;
20889 }
20890
20891 if (!isLegalIndexedOffset)
20892 return false;
20893
20894 Offset = Op->getOperand(1);
20895 return true;
20896 }
20897
20898 return false;
20899}
20900
20902 SDValue &Offset,
20904 SelectionDAG &DAG) const {
20905 EVT VT;
20906 SDValue Ptr;
20907 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20908 VT = LD->getMemoryVT();
20909 Ptr = LD->getBasePtr();
20910 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20911 VT = ST->getMemoryVT();
20912 Ptr = ST->getBasePtr();
20913 } else
20914 return false;
20915
20916 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20917 return false;
20918
20919 AM = ISD::PRE_INC;
20920 return true;
20921}
20922
20924 SDValue &Base,
20925 SDValue &Offset,
20927 SelectionDAG &DAG) const {
20928 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
20929 if (Op->getOpcode() != ISD::ADD)
20930 return false;
20931
20933 Base = LS->getBasePtr();
20934 else
20935 return false;
20936
20937 if (Base == Op->getOperand(0))
20938 Offset = Op->getOperand(1);
20939 else if (Base == Op->getOperand(1))
20940 Offset = Op->getOperand(0);
20941 else
20942 return false;
20943
20944 AM = ISD::POST_INC;
20945 return true;
20946 }
20947
20948 EVT VT;
20949 SDValue Ptr;
20950 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20951 VT = LD->getMemoryVT();
20952 Ptr = LD->getBasePtr();
20953 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20954 VT = ST->getMemoryVT();
20955 Ptr = ST->getBasePtr();
20956 } else
20957 return false;
20958
20959 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20960 return false;
20961 // Post-indexing updates the base, so it's not a valid transform
20962 // if that's not the same as the load's pointer.
20963 if (Ptr != Base)
20964 return false;
20965
20966 AM = ISD::POST_INC;
20967 return true;
20968}
20969
20971 EVT VT) const {
20972 EVT SVT = VT.getScalarType();
20973
20974 if (!SVT.isSimple())
20975 return false;
20976
20977 switch (SVT.getSimpleVT().SimpleTy) {
20978 case MVT::f16:
20979 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20980 : Subtarget.hasStdExtZfhOrZhinx();
20981 case MVT::f32:
20982 return Subtarget.hasStdExtFOrZfinx();
20983 case MVT::f64:
20984 return Subtarget.hasStdExtDOrZdinx();
20985 default:
20986 break;
20987 }
20988
20989 return false;
20990}
20991
20993 // Zacas will use amocas.w which does not require extension.
20994 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20995}
20996
20998 const Constant *PersonalityFn) const {
20999 return RISCV::X10;
21000}
21001
21003 const Constant *PersonalityFn) const {
21004 return RISCV::X11;
21005}
21006
21008 // Return false to suppress the unnecessary extensions if the LibCall
21009 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
21010 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
21011 Type.getSizeInBits() < Subtarget.getXLen()))
21012 return false;
21013
21014 return true;
21015}
21016
21018 if (Subtarget.is64Bit() && Type == MVT::i32)
21019 return true;
21020
21021 return IsSigned;
21022}
21023
21025 SDValue C) const {
21026 // Check integral scalar types.
21027 if (!VT.isScalarInteger())
21028 return false;
21029
21030 // Omit the optimization if the sub target has the M extension and the data
21031 // size exceeds XLen.
21032 const bool HasZmmul = Subtarget.hasStdExtZmmul();
21033 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
21034 return false;
21035
21036 auto *ConstNode = cast<ConstantSDNode>(C);
21037 const APInt &Imm = ConstNode->getAPIntValue();
21038
21039 // Break the MUL to a SLLI and an ADD/SUB.
21040 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
21041 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
21042 return true;
21043
21044 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
21045 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
21046 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
21047 (Imm - 8).isPowerOf2()))
21048 return true;
21049
21050 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
21051 // a pair of LUI/ADDI.
21052 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
21053 ConstNode->hasOneUse()) {
21054 APInt ImmS = Imm.ashr(Imm.countr_zero());
21055 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
21056 (1 - ImmS).isPowerOf2())
21057 return true;
21058 }
21059
21060 return false;
21061}
21062
21064 SDValue ConstNode) const {
21065 // Let the DAGCombiner decide for vectors.
21066 EVT VT = AddNode.getValueType();
21067 if (VT.isVector())
21068 return true;
21069
21070 // Let the DAGCombiner decide for larger types.
21071 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
21072 return true;
21073
21074 // It is worse if c1 is simm12 while c1*c2 is not.
21075 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
21076 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
21077 const APInt &C1 = C1Node->getAPIntValue();
21078 const APInt &C2 = C2Node->getAPIntValue();
21079 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
21080 return false;
21081
21082 // Default to true and let the DAGCombiner decide.
21083 return true;
21084}
21085
21087 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
21088 unsigned *Fast) const {
21089 if (!VT.isVector()) {
21090 if (Fast)
21091 *Fast = Subtarget.enableUnalignedScalarMem();
21092 return Subtarget.enableUnalignedScalarMem();
21093 }
21094
21095 // All vector implementations must support element alignment
21096 EVT ElemVT = VT.getVectorElementType();
21097 if (Alignment >= ElemVT.getStoreSize()) {
21098 if (Fast)
21099 *Fast = 1;
21100 return true;
21101 }
21102
21103 // Note: We lower an unmasked unaligned vector access to an equally sized
21104 // e8 element type access. Given this, we effectively support all unmasked
21105 // misaligned accesses. TODO: Work through the codegen implications of
21106 // allowing such accesses to be formed, and considered fast.
21107 if (Fast)
21108 *Fast = Subtarget.enableUnalignedVectorMem();
21109 return Subtarget.enableUnalignedVectorMem();
21110}
21111
21112
21114 const AttributeList &FuncAttributes) const {
21115 if (!Subtarget.hasVInstructions())
21116 return MVT::Other;
21117
21118 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
21119 return MVT::Other;
21120
21121 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
21122 // has an expansion threshold, and we want the number of hardware memory
21123 // operations to correspond roughly to that threshold. LMUL>1 operations
21124 // are typically expanded linearly internally, and thus correspond to more
21125 // than one actual memory operation. Note that store merging and load
21126 // combining will typically form larger LMUL operations from the LMUL1
21127 // operations emitted here, and that's okay because combining isn't
21128 // introducing new memory operations; it's just merging existing ones.
21129 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
21130 if (Op.size() < MinVLenInBytes)
21131 // TODO: Figure out short memops. For the moment, do the default thing
21132 // which ends up using scalar sequences.
21133 return MVT::Other;
21134
21135 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
21136 // fixed vectors.
21137 if (MinVLenInBytes <= RISCV::RVVBitsPerBlock / 8)
21138 return MVT::Other;
21139
21140 // Prefer i8 for non-zero memset as it allows us to avoid materializing
21141 // a large scalar constant and instead use vmv.v.x/i to do the
21142 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
21143 // maximize the chance we can encode the size in the vsetvli.
21144 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
21145 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
21146
21147 // Do we have sufficient alignment for our preferred VT? If not, revert
21148 // to largest size allowed by our alignment criteria.
21149 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
21150 Align RequiredAlign(PreferredVT.getStoreSize());
21151 if (Op.isFixedDstAlign())
21152 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
21153 if (Op.isMemcpy())
21154 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
21155 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
21156 }
21157 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
21158}
21159
21161 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
21162 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
21163 bool IsABIRegCopy = CC.has_value();
21164 EVT ValueVT = Val.getValueType();
21165 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21166 PartVT == MVT::f32) {
21167 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
21168 // nan, and cast to f32.
21169 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
21170 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
21171 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
21172 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
21173 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
21174 Parts[0] = Val;
21175 return true;
21176 }
21177
21178 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21179 LLVMContext &Context = *DAG.getContext();
21180 EVT ValueEltVT = ValueVT.getVectorElementType();
21181 EVT PartEltVT = PartVT.getVectorElementType();
21182 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21183 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21184 if (PartVTBitSize % ValueVTBitSize == 0) {
21185 assert(PartVTBitSize >= ValueVTBitSize);
21186 // If the element types are different, bitcast to the same element type of
21187 // PartVT first.
21188 // Give an example here, we want copy a <vscale x 1 x i8> value to
21189 // <vscale x 4 x i16>.
21190 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
21191 // subvector, then we can bitcast to <vscale x 4 x i16>.
21192 if (ValueEltVT != PartEltVT) {
21193 if (PartVTBitSize > ValueVTBitSize) {
21194 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21195 assert(Count != 0 && "The number of element should not be zero.");
21196 EVT SameEltTypeVT =
21197 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21198 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
21199 DAG.getUNDEF(SameEltTypeVT), Val,
21200 DAG.getVectorIdxConstant(0, DL));
21201 }
21202 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
21203 } else {
21204 Val =
21205 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
21206 Val, DAG.getVectorIdxConstant(0, DL));
21207 }
21208 Parts[0] = Val;
21209 return true;
21210 }
21211 }
21212
21213 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
21214 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
21215 [[maybe_unused]] unsigned ValLMUL =
21217 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
21218 [[maybe_unused]] unsigned PartLMUL =
21220 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
21221 "RISC-V vector tuple type only accepts same register class type "
21222 "TUPLE_INSERT");
21223
21224 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
21225 Val, DAG.getVectorIdxConstant(0, DL));
21226 Parts[0] = Val;
21227 return true;
21228 }
21229 return false;
21230}
21231
21233 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
21234 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
21235 bool IsABIRegCopy = CC.has_value();
21236 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
21237 PartVT == MVT::f32) {
21238 SDValue Val = Parts[0];
21239
21240 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
21241 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
21242 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
21243 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
21244 return Val;
21245 }
21246
21247 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
21248 LLVMContext &Context = *DAG.getContext();
21249 SDValue Val = Parts[0];
21250 EVT ValueEltVT = ValueVT.getVectorElementType();
21251 EVT PartEltVT = PartVT.getVectorElementType();
21252 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
21253 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21254 if (PartVTBitSize % ValueVTBitSize == 0) {
21255 assert(PartVTBitSize >= ValueVTBitSize);
21256 EVT SameEltTypeVT = ValueVT;
21257 // If the element types are different, convert it to the same element type
21258 // of PartVT.
21259 // Give an example here, we want copy a <vscale x 1 x i8> value from
21260 // <vscale x 4 x i16>.
21261 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21262 // then we can extract <vscale x 1 x i8>.
21263 if (ValueEltVT != PartEltVT) {
21264 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21265 assert(Count != 0 && "The number of element should not be zero.");
21266 SameEltTypeVT =
21267 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21268 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21269 }
21270 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21271 DAG.getVectorIdxConstant(0, DL));
21272 return Val;
21273 }
21274 }
21275 return SDValue();
21276}
21277
21279 // When aggressively optimizing for code size, we prefer to use a div
21280 // instruction, as it is usually smaller than the alternative sequence.
21281 // TODO: Add vector division?
21282 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21283 return OptSize && !VT.isVector();
21284}
21285
21287 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21288 // some situation.
21289 unsigned Opc = N->getOpcode();
21290 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21291 return false;
21292 return true;
21293}
21294
21295static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21296 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
21297 Function *ThreadPointerFunc =
21298 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
21299 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21300 IRB.CreateCall(ThreadPointerFunc), Offset);
21301}
21302
21304 // Fuchsia provides a fixed TLS slot for the stack cookie.
21305 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21306 if (Subtarget.isTargetFuchsia())
21307 return useTpOffset(IRB, -0x10);
21308
21309 // Android provides a fixed TLS slot for the stack cookie. See the definition
21310 // of TLS_SLOT_STACK_GUARD in
21311 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
21312 if (Subtarget.isTargetAndroid())
21313 return useTpOffset(IRB, -0x18);
21314
21316}
21317
21319 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21320 const DataLayout &DL) const {
21321 EVT VT = getValueType(DL, VTy);
21322 // Don't lower vlseg/vsseg for vector types that can't be split.
21323 if (!isTypeLegal(VT))
21324 return false;
21325
21327 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21328 Alignment))
21329 return false;
21330
21331 MVT ContainerVT = VT.getSimpleVT();
21332
21333 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21334 if (!Subtarget.useRVVForFixedLengthVectors())
21335 return false;
21336 // Sometimes the interleaved access pass picks up splats as interleaves of
21337 // one element. Don't lower these.
21338 if (FVTy->getNumElements() < 2)
21339 return false;
21340
21342 } else {
21343 // The intrinsics for scalable vectors are not overloaded on pointer type
21344 // and can only handle the default address space.
21345 if (AddrSpace)
21346 return false;
21347 }
21348
21349 // Need to make sure that EMUL * NFIELDS ≤ 8
21350 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21351 if (Fractional)
21352 return true;
21353 return Factor * LMUL <= 8;
21354}
21355
21357 Align Alignment) const {
21358 if (!Subtarget.hasVInstructions())
21359 return false;
21360
21361 // Only support fixed vectors if we know the minimum vector size.
21362 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21363 return false;
21364
21365 EVT ScalarType = DataType.getScalarType();
21366 if (!isLegalElementTypeForRVV(ScalarType))
21367 return false;
21368
21369 if (!Subtarget.enableUnalignedVectorMem() &&
21370 Alignment < ScalarType.getStoreSize())
21371 return false;
21372
21373 return true;
21374}
21375
21377 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21378 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21379 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21380 Intrinsic::riscv_seg8_load};
21381
21382/// Lower an interleaved load into a vlsegN intrinsic.
21383///
21384/// E.g. Lower an interleaved load (Factor = 2):
21385/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21386/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21387/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21388///
21389/// Into:
21390/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21391/// %ptr, i64 4)
21392/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21393/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21396 ArrayRef<unsigned> Indices, unsigned Factor) const {
21397 IRBuilder<> Builder(LI);
21398
21399 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
21400 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
21402 LI->getDataLayout()))
21403 return false;
21404
21405 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21406
21407 Function *VlsegNFunc =
21409 {VTy, LI->getPointerOperandType(), XLenTy});
21410
21411 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21412
21413 CallInst *VlsegN =
21414 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21415
21416 for (unsigned i = 0; i < Shuffles.size(); i++) {
21417 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
21418 Shuffles[i]->replaceAllUsesWith(SubVec);
21419 }
21420
21421 return true;
21422}
21423
21425 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
21426 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
21427 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
21428 Intrinsic::riscv_seg8_store};
21429
21430/// Lower an interleaved store into a vssegN intrinsic.
21431///
21432/// E.g. Lower an interleaved store (Factor = 3):
21433/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21434/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21435/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21436///
21437/// Into:
21438/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21439/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21440/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21441/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21442/// %ptr, i32 4)
21443///
21444/// Note that the new shufflevectors will be removed and we'll only generate one
21445/// vsseg3 instruction in CodeGen.
21447 ShuffleVectorInst *SVI,
21448 unsigned Factor) const {
21449 IRBuilder<> Builder(SI);
21450 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21451 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21452 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21453 ShuffleVTy->getNumElements() / Factor);
21454 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21455 SI->getPointerAddressSpace(),
21456 SI->getDataLayout()))
21457 return false;
21458
21459 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21460
21461 Function *VssegNFunc =
21462 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21463 {VTy, SI->getPointerOperandType(), XLenTy});
21464
21465 auto Mask = SVI->getShuffleMask();
21467
21468 for (unsigned i = 0; i < Factor; i++) {
21469 Value *Shuffle = Builder.CreateShuffleVector(
21470 SVI->getOperand(0), SVI->getOperand(1),
21471 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21472 Ops.push_back(Shuffle);
21473 }
21474 // This VL should be OK (should be executable in one vsseg instruction,
21475 // potentially under larger LMULs) because we checked that the fixed vector
21476 // type fits in isLegalInterleavedAccessType
21477 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21478 Ops.append({SI->getPointerOperand(), VL});
21479
21480 Builder.CreateCall(VssegNFunc, Ops);
21481
21482 return true;
21483}
21484
21486 IntrinsicInst *DI, LoadInst *LI,
21487 SmallVectorImpl<Instruction *> &DeadInsts) const {
21488 assert(LI->isSimple());
21489 IRBuilder<> Builder(LI);
21490
21491 // Only deinterleave2 supported at present.
21492 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21493 return false;
21494
21495 const unsigned Factor = 2;
21496
21498 const DataLayout &DL = LI->getDataLayout();
21499
21500 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21501 LI->getPointerAddressSpace(), DL))
21502 return false;
21503
21504 Value *Return;
21505 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21506
21507 if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
21508 Function *VlsegNFunc = Intrinsic::getDeclaration(
21509 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21510 {ResVTy, LI->getPointerOperandType(), XLenTy});
21511 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21512 Return = Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21513 } else {
21514 static const Intrinsic::ID IntrIds[] = {
21515 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21516 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21517 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21518 Intrinsic::riscv_vlseg8};
21519
21520 unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType());
21521 unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
21522 Type *VecTupTy = TargetExtType::get(
21523 LI->getContext(), "riscv.vector.tuple",
21525 NumElts * SEW / 8),
21526 Factor);
21527
21528 Function *VlsegNFunc = Intrinsic::getDeclaration(
21529 LI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
21530 Value *VL = Constant::getAllOnesValue(XLenTy);
21531
21532 Value *Vlseg = Builder.CreateCall(
21533 VlsegNFunc, {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
21534 ConstantInt::get(XLenTy, Log2_64(SEW))});
21535
21536 SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
21537 Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
21538 Function *VecExtractFunc = Intrinsic::getDeclaration(
21539 LI->getModule(), Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy});
21540 for (unsigned i = 0; i < Factor; ++i) {
21541 Value *VecExtract =
21542 Builder.CreateCall(VecExtractFunc, {Vlseg, Builder.getInt32(i)});
21543 Return = Builder.CreateInsertValue(Return, VecExtract, i);
21544 }
21545 }
21546
21547 DI->replaceAllUsesWith(Return);
21548
21549 return true;
21550}
21551
21554 SmallVectorImpl<Instruction *> &DeadInsts) const {
21555 assert(SI->isSimple());
21556 IRBuilder<> Builder(SI);
21557
21558 // Only interleave2 supported at present.
21559 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21560 return false;
21561
21562 const unsigned Factor = 2;
21563
21564 VectorType *InVTy = cast<VectorType>(II->getArgOperand(0)->getType());
21565 const DataLayout &DL = SI->getDataLayout();
21566
21567 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21568 SI->getPointerAddressSpace(), DL))
21569 return false;
21570
21571 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21572
21573 if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
21574 Function *VssegNFunc = Intrinsic::getDeclaration(
21575 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21576 {InVTy, SI->getPointerOperandType(), XLenTy});
21577 Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21578 Builder.CreateCall(VssegNFunc, {II->getArgOperand(0), II->getArgOperand(1),
21579 SI->getPointerOperand(), VL});
21580 } else {
21581 static const Intrinsic::ID IntrIds[] = {
21582 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21583 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21584 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21585 Intrinsic::riscv_vsseg8};
21586
21587 unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
21588 unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
21589 Type *VecTupTy = TargetExtType::get(
21590 SI->getContext(), "riscv.vector.tuple",
21591 ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
21592 NumElts * SEW / 8),
21593 Factor);
21594
21595 Function *VssegNFunc = Intrinsic::getDeclaration(
21596 SI->getModule(), IntrIds[Factor - 2], {VecTupTy, XLenTy});
21597
21598 Value *VL = Constant::getAllOnesValue(XLenTy);
21599
21600 Function *VecInsertFunc = Intrinsic::getDeclaration(
21601 SI->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy});
21602 Value *StoredVal = PoisonValue::get(VecTupTy);
21603 for (unsigned i = 0; i < Factor; ++i)
21604 StoredVal =
21605 Builder.CreateCall(VecInsertFunc, {StoredVal, II->getArgOperand(i),
21606 Builder.getInt32(i)});
21607
21608 Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
21609 ConstantInt::get(XLenTy, Log2_64(SEW))});
21610 }
21611
21612 return true;
21613}
21614
21618 const TargetInstrInfo *TII) const {
21619 assert(MBBI->isCall() && MBBI->getCFIType() &&
21620 "Invalid call instruction for a KCFI check");
21621 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21622 MBBI->getOpcode()));
21623
21624 MachineOperand &Target = MBBI->getOperand(0);
21625 Target.setIsRenamable(false);
21626
21627 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21628 .addReg(Target.getReg())
21629 .addImm(MBBI->getCFIType())
21630 .getInstr();
21631}
21632
21633#define GET_REGISTER_MATCHER
21634#include "RISCVGenAsmMatcher.inc"
21635
21638 const MachineFunction &MF) const {
21640 if (Reg == RISCV::NoRegister)
21642 if (Reg == RISCV::NoRegister)
21644 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21645 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21646 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21647 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21648 StringRef(RegName) + "\"."));
21649 return Reg;
21650}
21651
21654 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21655
21656 if (NontemporalInfo == nullptr)
21658
21659 // 1 for default value work as __RISCV_NTLH_ALL
21660 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21661 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21662 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21663 // 5 -> __RISCV_NTLH_ALL
21664 int NontemporalLevel = 5;
21665 const MDNode *RISCVNontemporalInfo =
21666 I.getMetadata("riscv-nontemporal-domain");
21667 if (RISCVNontemporalInfo != nullptr)
21668 NontemporalLevel =
21670 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21671 ->getValue())
21672 ->getZExtValue();
21673
21674 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21675 "RISC-V target doesn't support this non-temporal domain.");
21676
21677 NontemporalLevel -= 2;
21679 if (NontemporalLevel & 0b1)
21680 Flags |= MONontemporalBit0;
21681 if (NontemporalLevel & 0b10)
21682 Flags |= MONontemporalBit1;
21683
21684 return Flags;
21685}
21686
21689
21690 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21692 TargetFlags |= (NodeFlags & MONontemporalBit0);
21693 TargetFlags |= (NodeFlags & MONontemporalBit1);
21694 return TargetFlags;
21695}
21696
21698 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21699 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21700}
21701
21703 if (VT.isScalableVector())
21704 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21705 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21706 return true;
21707 return Subtarget.hasStdExtZbb() &&
21708 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21709}
21710
21712 ISD::CondCode Cond) const {
21713 return isCtpopFast(VT) ? 0 : 1;
21714}
21715
21717
21718 // GISel support is in progress or complete for these opcodes.
21719 unsigned Op = Inst.getOpcode();
21720 if (Op == Instruction::Add || Op == Instruction::Sub ||
21721 Op == Instruction::And || Op == Instruction::Or ||
21722 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21723 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
21724 Op == Instruction::Freeze || Op == Instruction::Store)
21725 return false;
21726
21727 if (Inst.getType()->isScalableTy())
21728 return true;
21729
21730 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21731 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21732 !isa<ReturnInst>(&Inst))
21733 return true;
21734
21735 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21736 if (AI->getAllocatedType()->isScalableTy())
21737 return true;
21738 }
21739
21740 return false;
21741}
21742
21743SDValue
21744RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21745 SelectionDAG &DAG,
21746 SmallVectorImpl<SDNode *> &Created) const {
21748 if (isIntDivCheap(N->getValueType(0), Attr))
21749 return SDValue(N, 0); // Lower SDIV as SDIV
21750
21751 // Only perform this transform if short forward branch opt is supported.
21752 if (!Subtarget.hasShortForwardBranchOpt())
21753 return SDValue();
21754 EVT VT = N->getValueType(0);
21755 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21756 return SDValue();
21757
21758 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21759 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21760 return SDValue();
21761 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21762}
21763
21764bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21765 EVT VT, const APInt &AndMask) const {
21766 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21767 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21769}
21770
21771unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21772 return Subtarget.getMinimumJumpTableEntries();
21773}
21774
21777 int JTI,
21778 SelectionDAG &DAG) const {
21779 if (Subtarget.hasStdExtZicfilp()) {
21780 // When Zicfilp enabled, we need to use software guarded branch for jump
21781 // table branch.
21782 SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);
21783 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, JTInfo,
21784 Addr);
21785 }
21786 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
21787}
21788
21790
21791#define GET_RISCVVIntrinsicsTable_IMPL
21792#include "RISCVGenSearchableTables.inc"
21793
21794} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define LLVM_DEBUG(X)
Definition Debug.h:101
uint64_t Align
uint64_t Addr
uint64_t Size
bool End
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
unsigned Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static bool hasPassthruOp(unsigned Opcode)
Return true if a RISC-V target specified op has a passthru operand.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
const MachineOperand & RHS
return LHS getImm()<
#define ROTR(x, n)
Definition SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(T *V)
Returns the type of the given value/instruction V.
static bool isCommutative(Instruction *I)
#define ROTL(x, b)
Definition SipHash.cpp:32
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:40
static constexpr int Concat[]
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1249
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1241
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1021
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:222
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1513
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1379
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1485
APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1323
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1194
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:364
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1175
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:373
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:202
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:322
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1619
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1390
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:428
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:624
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:212
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1504
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:370
APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1711
bool isMask(unsigned numBits) const
Definition APInt.h:481
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:327
APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:959
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1250
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:433
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:299
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1123
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:289
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1382
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:455
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:279
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:232
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1535
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1214
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:23
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
Definition Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Definition ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This class holds the attributes for a function, its return value, and its parameters.
Definition Attributes.h:468
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:61
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:219
bool test(unsigned Idx) const
Definition BitVector.h:461
BitVector & set()
Definition BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:157
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition DataLayout.h:361
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:33
unsigned size() const
Definition DenseMap.h:99
Implements a dense probed hash-table based set.
Definition DenseSet.h:278
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:314
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:311
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:689
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:216
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:773
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:701
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:277
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:353
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:380
Argument * getArg(unsigned i) const
Definition Function.h:883
Helper struct to store a base, index and offset that forms an address.
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:91
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1902
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:171
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2439
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:513
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2692
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition MCExpr.h:394
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1430
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
bool hasVInstructionsBF16Minimal() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool isTargetAndroid() const
bool hasStdExtFOrZfinx() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI, SmallVectorImpl< Instruction * > &DeadInsts) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
static RISCVII::VLMUL getLMUL(MVT VT)
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
bool isTargetStrictFPOpcode() const
Test if this node has a target-specific opcode that may raise FP exceptions (in the <target>ISD names...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:710
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
size_t size() const
Definition SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:51
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:149
std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:370
static TargetExtType * get(LLVMContext &Context, StringRef Name, ArrayRef< Type * > Types={}, ArrayRef< unsigned > Ints={})
Return a target extension type having the specified name and optional type and integer parameters.
Definition Type.cpp:793
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:348
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:249
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:342
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:245
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:166
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:224
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:255
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition Type.h:371
A Use represents the edge between a Value definition and its users.
Definition Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:72
Value * getOperand(unsigned i) const
Definition User.h:228
unsigned getNumOperands() const
Definition User.h:250
LLVM Value Representation.
Definition Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1075
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:183
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:202
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:258
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:168
constexpr bool isZero() const
Definition TypeSize.h:156
self_iterator getIterator()
Definition ilist_node.h:132
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:779
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:752
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:490
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:44
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:743
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:246
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:205
@ GlobalAddress
Definition ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:840
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:557
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:716
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:262
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:236
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:684
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:634
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:751
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition ISDOpcodes.h:600
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:660
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:756
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:218
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:641
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:330
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:930
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:587
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:810
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:771
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:848
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:696
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:605
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:765
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:463
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:443
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:100
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:456
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:886
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:679
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:407
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:650
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:626
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:919
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:437
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:905
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:816
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:793
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:507
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:347
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition ISDOpcodes.h:594
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:831
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:691
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:835
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:529
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Create or insert an LLVM Function declaration for an intrinsic, and return it.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:57
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
@ Offset
Definition DWP.cpp:480
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:169
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2432
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:296
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1519
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:346
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:394
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1936
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1730
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:340
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:291
T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:81
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition Error.cpp:167
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:273
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:193
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:403
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition DAGCombine.h:15
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1929
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1887
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:581
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:208
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:210
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:860
#define N
#define NC
Definition regutils.h:42
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:254
static unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:329
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:85
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:389
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:396
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:278
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:294
ElementCount getVectorElementCount() const
Definition ValueTypes.h:344
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:367
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:379
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:424
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:310
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:375
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:413
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:317
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:322
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:330
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:302
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:62
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:263
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:150
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:70
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:285
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:169
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:269
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
void intersectWith(const SDNodeFlags Flags)
Clear any flags in this flag set that aren't also set in Flags.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...